From 49bbd2ef63b19bebf247fb201bceae4ec8218da6 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Mon, 20 Jun 2022 22:38:10 -0700 Subject: [PATCH 1/6] Fix Native Digits Support --- .../NumberFormatInfo/NumberFormatInfoTests.cs | 16 ++++ .../src/System/Globalization/CultureData.cs | 46 ++++++++---- .../pal_localeStringData.c | 75 +++++++++++++------ 3 files changed, 99 insertions(+), 38 deletions(-) diff --git a/src/libraries/System.Globalization/tests/NumberFormatInfo/NumberFormatInfoTests.cs b/src/libraries/System.Globalization/tests/NumberFormatInfo/NumberFormatInfoTests.cs index 9ff172194fd786..ad697e4835bd90 100644 --- a/src/libraries/System.Globalization/tests/NumberFormatInfo/NumberFormatInfoTests.cs +++ b/src/libraries/System.Globalization/tests/NumberFormatInfo/NumberFormatInfoTests.cs @@ -115,5 +115,21 @@ public void DigitSubstitutionListTest(string cultureName, DigitShapes shape) } } + public static IEnumerable NativeDigitTestData() + { + yield return new object[] { "ccp-Cakm-BD", new string[] { "\U0001E950", "\U0001E951", "\U0001E952", "\U0001E953", "\U0001E954", "\U0001E955", "\U0001E956", "\U0001E957", "\U0001E958", "\U0001E959" }}; + yield return new object[] { "ar-SA", new string[] {"\u0660", "\u0661", "\u0662", "\u0663", "\u0664", "\u0665", "\u0666", "\u0667", "\u0668", "\u0669" }}; + yield return new object[] { "en-US", new string[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }}; + yield return new object[] { "ur-IN", new string[] { "\u06F0", "\u06F1", "\u06F2", "\u06F3", "\u06F4", "\u06F5", "\u06F6", "\u06F7", "\u06F8", "\u06F9" }}; + } + + public static bool FullICUPlatform => PlatformDetection.IsIcuGlobalization && PlatformDetection.IsNotBrowser; + + [ConditionalTheory(nameof(FullICUPlatform))] + [MemberData(nameof(NativeDigitTestData))] + public void TestNativeDigits(string cultureName, string[] nativeDigits) + { + Assert.Equal(nativeDigits, CultureInfo.GetCultureInfo(cultureName).NumberFormat.NativeDigits); + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs index 8212deead438cb..23550d80ccdfba 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs @@ -2148,31 +2148,45 @@ private string[] GetNativeDigits() return result; } - // Try to check if the digits are all ASCII so we can avoid the array allocation and use the static array NumberFormatInfo.s_asciiDigits instead. - // If we have non-ASCII digits, we should exit the loop very quickly. - int i = 0; - while (i < NumberFormatInfo.s_asciiDigits.Length) - { - if (digits[i] != NumberFormatInfo.s_asciiDigits[i][0]) - { - break; - } - i++; - } + // In ICU we separate the digits with the '\uFFFF' character - if (i >= NumberFormatInfo.s_asciiDigits.Length) + if (digits.StartsWith("0\uFFFF1\uFFFF2\uFFFF3\uFFFF4\uFFFF5\uFFFF6\uFFFF7\uFFFF8\uFFFF9\uFFFF", StringComparison.Ordinal) || // ICU common cases + digits.StartsWith("0123456789", StringComparison.Ordinal)) // NLS common cases { return result; } - // we have non-ASCII digits + // None ASCII digits + + // Check if values coming from ICU separated by 0xFFFF + int ffffPos = digits.IndexOf('\uFFFF'); + result = new string[10]; - for (i = 0; i < result.Length; i++) + if (ffffPos < 0) // NLS case { - result[i] = char.ToString(digits[i]); + for (int i = 0; i < result.Length; i++) + { + result[i] = char.ToString(digits[i]); + } + + return result; } - return result; + int start = 0; + int index = 0; + + do + { + result[index++] = digits.Substring(start, ffffPos - start); + start = ++ffffPos; + while (ffffPos < digits.Length && digits[ffffPos] != '\uFFFF') + { + ffffPos++; + } + + } while (ffffPos < digits.Length && index < 10); + + return index < 10 ? NumberFormatInfo.s_asciiDigits : result; } internal void GetNFIValues(NumberFormatInfo nfi) diff --git a/src/native/libs/System.Globalization.Native/pal_localeStringData.c b/src/native/libs/System.Globalization.Native/pal_localeStringData.c index f4f927f5801881..dfc8f370d2693e 100644 --- a/src/native/libs/System.Globalization.Native/pal_localeStringData.c +++ b/src/native/libs/System.Globalization.Native/pal_localeStringData.c @@ -19,11 +19,17 @@ Obtains the value of a DecimalFormatSymbols static UErrorCode GetLocaleInfoDecimalFormatSymbol(const char* locale, UNumberFormatSymbol symbol, UChar* value, - int32_t valueLength) + int32_t valueLength, + int32_t* symbolLength) { UErrorCode status = U_ZERO_ERROR; UNumberFormat* pFormat = unum_open(UNUM_DECIMAL, NULL, 0, locale, NULL, &status); - unum_getSymbol(pFormat, symbol, value, valueLength, &status); + + int32_t l = unum_getSymbol(pFormat, symbol, value, valueLength, &status); + if (symbolLength != NULL) + { + *symbolLength = l; + } unum_close(pFormat); return status; } @@ -39,14 +45,15 @@ static UErrorCode GetDigitSymbol(const char* locale, UNumberFormatSymbol symbol, int digit, UChar* value, - int32_t valueLength) + int32_t valueLength, + int32_t* symbolLength) { if (U_FAILURE(previousStatus)) { return previousStatus; } - return GetLocaleInfoDecimalFormatSymbol(locale, symbol, value + digit, valueLength - digit); + return GetLocaleInfoDecimalFormatSymbol(locale, symbol, value + digit, valueLength - digit, symbolLength); } /* @@ -279,26 +286,50 @@ int32_t GlobalizationNative_GetLocaleInfoString(const UChar* localeName, } break; case LocaleString_ThousandSeparator: - status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_GROUPING_SEPARATOR_SYMBOL, value, valueLength); + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_GROUPING_SEPARATOR_SYMBOL, value, valueLength, NULL); break; case LocaleString_DecimalSeparator: - status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_DECIMAL_SEPARATOR_SYMBOL, value, valueLength); + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_DECIMAL_SEPARATOR_SYMBOL, value, valueLength, NULL); break; case LocaleString_Digits: - status = GetDigitSymbol(locale, status, UNUM_ZERO_DIGIT_SYMBOL, 0, value, valueLength); - // symbols UNUM_ONE_DIGIT to UNUM_NINE_DIGIT are contiguous - for (int32_t symbol = UNUM_ONE_DIGIT_SYMBOL; symbol <= UNUM_NINE_DIGIT_SYMBOL; symbol++) { - int charIndex = symbol - UNUM_ONE_DIGIT_SYMBOL + 1; - status = GetDigitSymbol( - locale, status, (UNumberFormatSymbol)symbol, charIndex, value, valueLength); + // Native digit can be more than one character (e.g. ccp-Cakm-BD locale which using surrogate pairs to represent the native digit). + // We'll separate the native digits in the returned buffer by the character '\uFFFF'. + int charIndex = 0; + int symbolLength = 0; + + status = GetDigitSymbol(locale, status, UNUM_ZERO_DIGIT_SYMBOL, 0, value, valueLength, &symbolLength); + charIndex += symbolLength; + + if (U_SUCCESS(status) && (uint32_t)charIndex < (uint32_t)valueLength) + { + value[charIndex++] = 0xFFFF; + + // symbols UNUM_ONE_DIGIT to UNUM_NINE_DIGIT are contiguous + for (int32_t symbol = UNUM_ONE_DIGIT_SYMBOL; symbol <= UNUM_NINE_DIGIT_SYMBOL && charIndex < valueLength - 3; symbol++) + { + status = GetDigitSymbol(locale, status, (UNumberFormatSymbol)symbol, charIndex, value, valueLength, &symbolLength); + charIndex += symbolLength; + if (!U_SUCCESS(status) || (uint32_t)charIndex >= (uint32_t)valueLength) + { + break; + } + + value[charIndex++] = 0xFFFF; + } + + if ((uint32_t)charIndex < (uint32_t)valueLength) + { + value[charIndex] = 0; + } + } } break; case LocaleString_MonetarySymbol: - status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_CURRENCY_SYMBOL, value, valueLength); + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_CURRENCY_SYMBOL, value, valueLength, NULL); break; case LocaleString_Iso4217MonetarySymbol: - status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INTL_CURRENCY_SYMBOL, value, valueLength); + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INTL_CURRENCY_SYMBOL, value, valueLength, NULL); break; case LocaleString_CurrencyEnglishName: status = GetLocaleCurrencyName(locale, false, value, valueLength); @@ -307,11 +338,11 @@ int32_t GlobalizationNative_GetLocaleInfoString(const UChar* localeName, status = GetLocaleCurrencyName(locale, true, value, valueLength); break; case LocaleString_MonetaryDecimalSeparator: - status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_SEPARATOR_SYMBOL, value, valueLength); + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_SEPARATOR_SYMBOL, value, valueLength, NULL); break; case LocaleString_MonetaryThousandSeparator: status = - GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL, value, valueLength); + GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL, value, valueLength, NULL); break; case LocaleString_AMDesignator: status = GetLocaleInfoAmPm(locale, true, value, valueLength); @@ -320,10 +351,10 @@ int32_t GlobalizationNative_GetLocaleInfoString(const UChar* localeName, status = GetLocaleInfoAmPm(locale, false, value, valueLength); break; case LocaleString_PositiveSign: - status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PLUS_SIGN_SYMBOL, value, valueLength); + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PLUS_SIGN_SYMBOL, value, valueLength, NULL); break; case LocaleString_NegativeSign: - status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MINUS_SIGN_SYMBOL, value, valueLength); + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MINUS_SIGN_SYMBOL, value, valueLength, NULL); break; case LocaleString_Iso639LanguageTwoLetterName: status = GetLocaleIso639LanguageTwoLetterName(locale, value, valueLength); @@ -338,10 +369,10 @@ int32_t GlobalizationNative_GetLocaleInfoString(const UChar* localeName, status = GetLocaleIso3166CountryCode(locale, value, valueLength); break; case LocaleString_NaNSymbol: - status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_NAN_SYMBOL, value, valueLength); + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_NAN_SYMBOL, value, valueLength, NULL); break; case LocaleString_PositiveInfinitySymbol: - status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INFINITY_SYMBOL, value, valueLength); + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INFINITY_SYMBOL, value, valueLength, NULL); break; case LocaleString_ParentName: { @@ -358,10 +389,10 @@ int32_t GlobalizationNative_GetLocaleInfoString(const UChar* localeName, break; } case LocaleString_PercentSymbol: - status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERCENT_SYMBOL, value, valueLength); + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERCENT_SYMBOL, value, valueLength, NULL); break; case LocaleString_PerMilleSymbol: - status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERMILL_SYMBOL, value, valueLength); + status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERMILL_SYMBOL, value, valueLength, NULL); break; default: status = U_UNSUPPORTED_ERROR; From d311ddea2c93c3c1db82cfa050f8f6ab568cd3ef Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Tue, 21 Jun 2022 09:47:48 -0700 Subject: [PATCH 2/6] Fix running the test on older ICU versions --- .../tests/NumberFormatInfo/NumberFormatInfoTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Globalization/tests/NumberFormatInfo/NumberFormatInfoTests.cs b/src/libraries/System.Globalization/tests/NumberFormatInfo/NumberFormatInfoTests.cs index ad697e4835bd90..0fe17597a799ac 100644 --- a/src/libraries/System.Globalization/tests/NumberFormatInfo/NumberFormatInfoTests.cs +++ b/src/libraries/System.Globalization/tests/NumberFormatInfo/NumberFormatInfoTests.cs @@ -123,7 +123,7 @@ public static IEnumerable NativeDigitTestData() yield return new object[] { "ur-IN", new string[] { "\u06F0", "\u06F1", "\u06F2", "\u06F3", "\u06F4", "\u06F5", "\u06F6", "\u06F7", "\u06F8", "\u06F9" }}; } - public static bool FullICUPlatform => PlatformDetection.IsIcuGlobalization && PlatformDetection.IsNotBrowser; + public static bool FullICUPlatform => PlatformDetection.ICUVersion.Major >= 66 && PlatformDetection.IsNotBrowser; [ConditionalTheory(nameof(FullICUPlatform))] [MemberData(nameof(NativeDigitTestData))] From 1604af39993113a0984ed10cc6cdab56356adcab Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Tue, 21 Jun 2022 10:11:34 -0700 Subject: [PATCH 3/6] Fix/add the comments --- .../src/System/Globalization/CultureData.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs index 23550d80ccdfba..921dcf26a88d74 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs @@ -2138,7 +2138,7 @@ private string[] GetNativeDigits() { string[] result = NumberFormatInfo.s_asciiDigits; - // LOCALE_SNATIVEDIGITS (array of 10 single character strings). + // NLS LOCALE_SNATIVEDIGITS (array of 10 single character strings). In case of ICU, the buffer can be longer. string digits = GetLocaleInfoCoreUserOverride(LocaleStringData.Digits); // if digits.Length < NumberFormatInfo.s_asciiDigits.Length means the native digits setting is messed up in the host machine. @@ -2156,7 +2156,7 @@ private string[] GetNativeDigits() return result; } - // None ASCII digits + // Non-ASCII digits // Check if values coming from ICU separated by 0xFFFF int ffffPos = digits.IndexOf('\uFFFF'); @@ -2172,6 +2172,8 @@ private string[] GetNativeDigits() return result; } + // ICU case + int start = 0; int index = 0; From e5a83d081ffb0463022fc966c56f5b9616607a91 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Tue, 21 Jun 2022 13:04:17 -0700 Subject: [PATCH 4/6] Address the feedback --- .../pal_localeStringData.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/native/libs/System.Globalization.Native/pal_localeStringData.c b/src/native/libs/System.Globalization.Native/pal_localeStringData.c index dfc8f370d2693e..c2e2a02802fae9 100644 --- a/src/native/libs/System.Globalization.Native/pal_localeStringData.c +++ b/src/native/libs/System.Globalization.Native/pal_localeStringData.c @@ -25,10 +25,10 @@ static UErrorCode GetLocaleInfoDecimalFormatSymbol(const char* locale, UErrorCode status = U_ZERO_ERROR; UNumberFormat* pFormat = unum_open(UNUM_DECIMAL, NULL, 0, locale, NULL, &status); - int32_t l = unum_getSymbol(pFormat, symbol, value, valueLength, &status); + int32_t lengthResult = unum_getSymbol(pFormat, symbol, value, valueLength, &status); if (symbolLength != NULL) { - *symbolLength = l; + *symbolLength = lengthResult; } unum_close(pFormat); return status; @@ -293,13 +293,12 @@ int32_t GlobalizationNative_GetLocaleInfoString(const UChar* localeName, break; case LocaleString_Digits: { - // Native digit can be more than one character (e.g. ccp-Cakm-BD locale which using surrogate pairs to represent the native digit). + // Native digit can be more than one 16-bit character (e.g. ccp-Cakm-BD locale which using surrogate pairs to represent the native digit). // We'll separate the native digits in the returned buffer by the character '\uFFFF'. - int charIndex = 0; - int symbolLength = 0; - + int32_t symbolLength = 0; status = GetDigitSymbol(locale, status, UNUM_ZERO_DIGIT_SYMBOL, 0, value, valueLength, &symbolLength); - charIndex += symbolLength; + + int32_t charIndex = symbolLength; if (U_SUCCESS(status) && (uint32_t)charIndex < (uint32_t)valueLength) { From 9a13ddfbb32a689966d88ac17c3ccc32387d5987 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Wed, 22 Jun 2022 09:00:51 -0700 Subject: [PATCH 5/6] Feedback --- .../src/System/Globalization/CultureData.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs index 921dcf26a88d74..68a7e5e955d014 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs @@ -2181,7 +2181,7 @@ private string[] GetNativeDigits() { result[index++] = digits.Substring(start, ffffPos - start); start = ++ffffPos; - while (ffffPos < digits.Length && digits[ffffPos] != '\uFFFF') + while ((uint)ffffPos < (uint)digits.Length && digits[ffffPos] != '\uFFFF') { ffffPos++; } From 3878055d1a94e708ed1d8969e337002bd11fa99c Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Wed, 22 Jun 2022 09:29:00 -0700 Subject: [PATCH 6/6] Adding assert --- .../src/System/Globalization/CultureData.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs index 68a7e5e955d014..079441b08d2e99 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs @@ -2188,6 +2188,8 @@ private string[] GetNativeDigits() } while (ffffPos < digits.Length && index < 10); + Debug.Assert(index >= 10, $"Couldn't read native digits for '{_sWindowsName}' successfully."); + return index < 10 ? NumberFormatInfo.s_asciiDigits : result; }