Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Address the feedback
  • Loading branch information
tarekgh committed Jul 13, 2021
commit e44e3f3021ef37cc87dff754c0e4803a1575a0a5
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public CategoryCasingInfo(CodePoint codePoint)
break;
}

// For the compatability reasons we are not mapping the Turkish I's nor Latin small letter long S with invariant casing.
if (Program.IncludeCasingData && codePoint.Value != 0x0130 && codePoint.Value != 0x0131 && codePoint.Value != 0x017f)
{
_data.offsetToSimpleUppercase = (ushort)(codePoint.SimpleUppercaseMapping - codePoint.Value);
Expand Down
2,424 changes: 0 additions & 2,424 deletions src/coreclr/System.Private.CoreLib/Tools/GenUnicodeProp/CharUnicodeInfoData.cs

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ public static IEnumerable<object[]> ToUpper_TestData()

yield return new object[] { "embedded\0NuLL\0Byte\0", "EMBEDDED\0NULL\0BYTE\0", true };

// LATIN SMALL LETTER O WITH ACUTE, which has an upper case variant.
// LATIN SMALL LETTER O WITH ACUTE, mapped to LATIN CAPITAL LETTER O WITH ACUTE.
yield return new object[] { "\u00F3", "\u00D3", true };

// SNOWMAN, which does not have an upper case variant.
Expand Down Expand Up @@ -1167,8 +1167,9 @@ public void TestHashing()
[InlineData('A', 'A', 'a')]
[InlineData('i', 'I', 'i')] // to verify that we don't special-case the Turkish I in the invariant globalization mode
[InlineData('I', 'I', 'i')]
[InlineData(0x00C1, 0x00C1, 0x00C1)] // U+00C1 LATIN CAPITAL LETTER A WITH ACUTE
[InlineData(0x00E1, 0x00E1, 0x00E1)] // U+00E1 LATIN SMALL LETTER A WITH ACUTE
[InlineData('\u017f', '\u017f', '\u017f')] // Latin small letter long S shouldn't be case mapped in the invariant mode.
[InlineData(0x00C1, 0x00C1, 0x00E1)] // U+00C1 LATIN CAPITAL LETTER A WITH ACUTE
[InlineData(0x00E1, 0x00C1, 0x00E1)] // U+00E1 LATIN SMALL LETTER A WITH ACUTE
[InlineData(0x00D7, 0x00D7, 0x00D7)] // U+00D7 MULTIPLICATION SIGN
public void TestRune(int original, int expectedToUpper, int expectedToLower)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,31 +251,81 @@ private static double GetNumericValueNoBoundsCheck(uint codePoint)
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static char ToUpper(uint codePoint)
internal static char ToUpper(char codePoint)
{
nuint offset = GetCategoryCasingTableOffsetNoBoundsChecks((uint)codePoint);

// The offset is specified in shorts:
// Get the 'ref short' corresponding to where the addend is, read it as a signed 16-bit value, then add

ref short rsStart = ref Unsafe.As<byte, short>(ref MemoryMarshal.GetReference(UppercaseValues));
ref short rsDelta = ref Unsafe.Add(ref rsStart, (nint)offset);
nint delta = (BitConverter.IsLittleEndian) ? rsDelta : BinaryPrimitives.ReverseEndianness(rsDelta);
return (char)(delta + (nint)codePoint);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static uint ToUpper(uint codePoint)
{
if (UnicodeUtility.IsBmpCodePoint(codePoint))
{
return ToUpper((char) codePoint);
}

if (!UnicodeUtility.IsValidCodePoint(codePoint))
{
// We don't throw here even if we are getting invalid codepoint. We just return the same input value at that time.
return codePoint;
}

nuint offset = GetCategoryCasingTableOffsetNoBoundsChecks(codePoint);

// If the offset is specified in shorts:
// The offset is specified in shorts:
// Get the 'ref short' corresponding to where the addend is, read it as a signed 16-bit value, then add

ref short rsStart = ref Unsafe.As<byte, short>(ref MemoryMarshal.GetReference(UppercaseValues));
ref short rsDelta = ref Unsafe.Add(ref rsStart, (int)offset);
int delta = (BitConverter.IsLittleEndian) ? rsDelta : BinaryPrimitives.ReverseEndianness(rsDelta);
return (char)(delta + (int)codePoint);
ref short rsDelta = ref Unsafe.Add(ref rsStart, (nint)offset);
nint delta = (BitConverter.IsLittleEndian) ? rsDelta : BinaryPrimitives.ReverseEndianness(rsDelta);
return (uint)(delta + (nint)codePoint);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static char ToLower(uint codePoint)
internal static char ToLower(char codePoint)
{
nuint offset = GetCategoryCasingTableOffsetNoBoundsChecks((uint)codePoint);

// If the offset is specified in shorts:
// Get the 'ref short' corresponding to where the addend is, read it as a signed 16-bit value, then add

ref short rsStart = ref Unsafe.As<byte, short>(ref MemoryMarshal.GetReference(LowercaseValues));
ref short rsDelta = ref Unsafe.Add(ref rsStart, (nint)offset);
nint delta = (BitConverter.IsLittleEndian) ? rsDelta : BinaryPrimitives.ReverseEndianness(rsDelta);
return (char)(delta + (nint)codePoint);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static uint ToLower(uint codePoint)
{
if (UnicodeUtility.IsBmpCodePoint(codePoint))
{
return ToLower((char) codePoint);
}

if (!UnicodeUtility.IsValidCodePoint(codePoint))
{
// We don't throw here even if we are getting invalid codepoint. We just return the same input value at that time.
return codePoint;
}

nuint offset = GetCategoryCasingTableOffsetNoBoundsChecks(codePoint);

// If the offset is specified in shorts:
// Get the 'ref short' corresponding to where the addend is, read it as a signed 16-bit value, then add

ref short rsStart = ref Unsafe.As<byte, short>(ref MemoryMarshal.GetReference(LowercaseValues));
ref short rsDelta = ref Unsafe.Add(ref rsStart, (int)offset);
int delta = (BitConverter.IsLittleEndian) ? rsDelta : BinaryPrimitives.ReverseEndianness(rsDelta);
return (char)(delta + (int)codePoint);
ref short rsDelta = ref Unsafe.Add(ref rsStart, (nint)offset);
nint delta = (BitConverter.IsLittleEndian) ? rsDelta : BinaryPrimitives.ReverseEndianness(rsDelta);
return (uint)(delta + (int)codePoint);
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,18 +60,22 @@ private static void InvariantCreateSortKeyOrdinalIgnoreCase(ReadOnlySpan<char> s
for (int i = 0; i < source.Length; i++)
{
char c = source[i];
if (char.IsHighSurrogate(c) && i < source.Length - 1 && char.IsLowSurrogate(source[i + 1]))
if (char.IsHighSurrogate(c) && i < source.Length - 1)
{
SurrogateCasing.ToUpper(c, source[i + 1], out ushort hr, out ushort lr);
BinaryPrimitives.WriteUInt16BigEndian(sortKey, hr);
BinaryPrimitives.WriteUInt16BigEndian(sortKey, lr);
i++;
sortKey = sortKey.Slice(2 * sizeof(ushort));
continue;
char cl = source[i + 1];
if (char.IsLowSurrogate(cl))
{
SurrogateCasing.ToUpper(c, cl, out char hr, out char lr);
BinaryPrimitives.WriteUInt16BigEndian(sortKey, hr);
BinaryPrimitives.WriteUInt16BigEndian(sortKey, lr);
i++;
sortKey = sortKey.Slice(2 * sizeof(ushort));
continue;
}
}

// convert machine-endian to big-endian
BinaryPrimitives.WriteUInt16BigEndian(sortKey, (ushort)InvariantModeCasing.ToUpper(source[i]));
BinaryPrimitives.WriteUInt16BigEndian(sortKey, (ushort)InvariantModeCasing.ToUpper(c));
sortKey = sortKey.Slice(sizeof(ushort));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ internal static string ToLower(string s)
{
if (char.IsHighSurrogate(source[i]) && i < s.Length - 1 && char.IsLowSurrogate(source[i + 1]))
{
SurrogateCasing.ToLower(source[i], source[i + 1], out ushort h, out ushort l);
SurrogateCasing.ToLower(source[i], source[i + 1], out char h, out char l);
if (source[i] != h || source[i + 1] != l)
{
break;
Expand Down Expand Up @@ -76,7 +76,7 @@ internal static string ToUpper(string s)
{
if (char.IsHighSurrogate(source[i]) && i < s.Length - 1 && char.IsLowSurrogate(source[i + 1]))
{
SurrogateCasing.ToUpper(source[i], source[i + 1], out ushort h, out ushort l);
SurrogateCasing.ToUpper(source[i], source[i + 1], out char h, out char l);
if (source[i] != h || source[i + 1] != l)
{
break;
Expand Down Expand Up @@ -115,14 +115,18 @@ internal static void ToUpper(ReadOnlySpan<char> source, Span<char> destination)
for (int i = 0; i < source.Length; i++)
{
char c = source[i];
if (char.IsHighSurrogate(c) && i < source.Length - 1 && char.IsLowSurrogate(source[i + 1]))
if (char.IsHighSurrogate(c) && i < source.Length - 1)
{
// well formed surrogates
SurrogateCasing.ToUpper(c, source[i + 1], out ushort h, out ushort l);
destination[i] = (char)h;
destination[i+1] = (char)l;
i++; // skip the low surrogate
continue;
char cl = source[i + 1];
if (char.IsLowSurrogate(cl))
{
// well formed surrogates
SurrogateCasing.ToUpper(c, cl, out char h, out char l);
destination[i] = h;
destination[i+1] = l;
i++; // skip the low surrogate
continue;
}
}

destination[i] = ToUpper(c);
Expand All @@ -137,14 +141,18 @@ internal static void ToLower(ReadOnlySpan<char> source, Span<char> destination)
for (int i = 0; i < source.Length; i++)
{
char c = source[i];
if (char.IsHighSurrogate(c) && i < source.Length - 1 && char.IsLowSurrogate(source[i + 1]))
if (char.IsHighSurrogate(c) && i < source.Length - 1 )
{
// well formed surrogates
SurrogateCasing.ToLower(c, source[i + 1], out ushort h, out ushort l);
destination[i] = (char)h;
destination[i+1] = (char)l;
i++; // skip the low surrogate
continue;
char cl = source[i + 1];
if (char.IsLowSurrogate(cl))
{
// well formed surrogates
SurrogateCasing.ToLower(c, cl, out char h, out char l);
destination[i] = h;
destination[i+1] = l;
i++; // skip the low surrogate
continue;
}
}

destination[i] = ToLower(c);
Expand Down Expand Up @@ -214,8 +222,8 @@ internal static int CompareStringIgnoreCase(ref char strA, int lengthA, ref char
}

// we come here only if we have valid full surrogates
SurrogateCasing.ToUpper(a, charA, out ushort h1, out ushort l1);
SurrogateCasing.ToUpper(b, charB, out ushort h2, out ushort l2);
SurrogateCasing.ToUpper(a, charA, out char h1, out char l1);
SurrogateCasing.ToUpper(b, charB, out char h2, out char l2);

if (h1 != h2)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,27 +241,11 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
return OrdinalCasing.IndexOf(source, value);
}

internal static int LastIndexOfNoIgnoreCase(string source, string value, int startIndex, int count)
internal static int LastIndexOf(string source, string value, int startIndex, int count)
{
// startIndex is the index into source where we start search backwards from.
// leftStartIndex is the index into source of the start of the string that is
// count characters away from startIndex.
int leftStartIndex = startIndex - count + 1;

for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--)
{
int valueIndex, sourceIndex;

for (valueIndex = 0, sourceIndex = i;
valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
valueIndex++, sourceIndex++) ;

if (valueIndex == value.Length) {
return i;
}
}

return -1;
int result = source.AsSpan(startIndex, count).LastIndexOf(value);
if (result >= 0) { result += startIndex; } // if match found, adjust 'result' by the actual start position
return result;
}

internal static unsafe int LastIndexOf(string source, string value, int startIndex, int count, bool ignoreCase)
Expand All @@ -288,7 +272,7 @@ internal static unsafe int LastIndexOf(string source, string value, int startInd

if (GlobalizationMode.Invariant)
{
return ignoreCase ? InvariantModeCasing.LastIndexOfIgnoreCase(source.AsSpan().Slice(startIndex, count), value) : LastIndexOfNoIgnoreCase(source, value, startIndex, count);
return ignoreCase ? InvariantModeCasing.LastIndexOfIgnoreCase(source.AsSpan().Slice(startIndex, count), value) : LastIndexOf(source, value, startIndex, count);
}

if (GlobalizationMode.UseNls)
Expand All @@ -298,7 +282,7 @@ internal static unsafe int LastIndexOf(string source, string value, int startInd

if (!ignoreCase)
{
LastIndexOfNoIgnoreCase(source, value, startIndex, count);
LastIndexOf(source, value, startIndex, count);
}

if (!source.TryGetSpan(startIndex, count, out ReadOnlySpan<char> sourceSpan))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,18 @@ internal static void ToUpperOrdinal(ReadOnlySpan<char> source, Span<char> destin
continue;
}

if (char.IsHighSurrogate(c) && i < source.Length - 1 && char.IsLowSurrogate(source[i + 1]))
if (char.IsHighSurrogate(c) && i < source.Length - 1)
{
// well formed surrogates
SurrogateCasing.ToUpper(c, source[i + 1], out ushort h, out ushort l);
destination[i] = (char)h;
destination[i+1] = (char)l;
i++; // skip the low surrogate
continue;
char cl = source[i + 1];
if (char.IsLowSurrogate(cl))
{
// well formed surrogates
SurrogateCasing.ToUpper(c, cl, out char h, out char l);
destination[i] = h;
destination[i+1] = l;
i++; // skip the low surrogate
continue;
}
}

destination[i] = ToUpper(c);
Expand Down Expand Up @@ -244,8 +248,8 @@ internal static int CompareStringIgnoreCase(ref char strA, int lengthA, ref char
}

// we come here only if we have valid full surrogates
SurrogateCasing.ToUpper(a, charA, out ushort h1, out ushort l1);
SurrogateCasing.ToUpper(b, charB, out ushort h2, out ushort l2);
SurrogateCasing.ToUpper(a, charA, out char h1, out char l1);
SurrogateCasing.ToUpper(b, charB, out char h2, out char l2);

if (h1 != h2)
{
Expand Down
Loading