Skip to content

Commit d0ef0b7

Browse files
committed
Fix regressions in PR microsoft#251
1 parent 38d916a commit d0ef0b7

File tree

1 file changed

+28
-47
lines changed

1 file changed

+28
-47
lines changed

Release/src/utilities/asyncrt_utils.cpp

Lines changed: 28 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,6 @@
2525
#endif
2626
#endif
2727

28-
// Could use C++ standard library if not __GLIBCXX__,
29-
// For testing purposes we just the handwritten on all platforms.
30-
#if defined(CPPREST_STDLIB_UNICODE_CONVERSIONS)
31-
#include <codecvt>
32-
#endif
33-
3428
using namespace web;
3529
using namespace utility;
3630
using namespace utility::conversions;
@@ -346,10 +340,6 @@ inline size_t count_utf8_to_utf16(const std::string& s)
346340

347341
utf16string __cdecl conversions::utf8_to_utf16(const std::string &s)
348342
{
349-
#if defined(CPPREST_STDLIB_UNICODE_CONVERSIONS)
350-
std::wstring_convert<std::codecvt_utf8_utf16<utf16char>, utf16char> conversion;
351-
return conversion.from_bytes(s);
352-
#else
353343
// Save repeated heap allocations, use the length of resulting sequence.
354344
const size_t srcSize = s.size();
355345
const std::string::value_type* const srcData = &s[0];
@@ -391,7 +381,7 @@ utf16string __cdecl conversions::utf8_to_utf16(const std::string &s)
391381
{
392382
const char c2{ srcData[++index] };
393383
const char c3{ srcData[++index] };
394-
destData[destIndex++] = ((src & LOW_4BITS) << 12) | ((c2 & LOW_6BITS) << 6) | (c3 & LOW_6BITS);
384+
destData[destIndex++] = static_cast<utf16string::value_type>(((src & LOW_4BITS) << 12) | ((c2 & LOW_6BITS) << 6) | (c3 & LOW_6BITS));
395385
}
396386
break;
397387
case 0xD0: // 2 byte character, 0x80 to 0x7FF
@@ -406,7 +396,6 @@ utf16string __cdecl conversions::utf8_to_utf16(const std::string &s)
406396
}
407397
}
408398
return dest;
409-
#endif
410399
}
411400

412401

@@ -453,10 +442,6 @@ inline size_t count_utf16_to_utf8(const utf16string &w)
453442

454443
std::string __cdecl conversions::utf16_to_utf8(const utf16string &w)
455444
{
456-
#if defined(CPPREST_STDLIB_UNICODE_CONVERSIONS)
457-
std::wstring_convert<std::codecvt_utf8_utf16<utf16char>, utf16char> conversion;
458-
return conversion.to_bytes(w);
459-
#else
460445
const size_t srcSize = w.size();
461446
const utf16string::value_type* const srcData = &w[0];
462447
std::string dest(count_utf16_to_utf8(w), '\0');
@@ -465,7 +450,7 @@ std::string __cdecl conversions::utf16_to_utf8(const utf16string &w)
465450

466451
for (size_t index = 0; index < srcSize; ++index)
467452
{
468-
const utf16string::value_type src{ srcData[index] };
453+
const utf16string::value_type src = srcData[index];
469454
if (src <= 0x7FF)
470455
{
471456
if (src <= 0x7F) // single byte character
@@ -478,41 +463,37 @@ std::string __cdecl conversions::utf16_to_utf8(const utf16string &w)
478463
destData[destIndex++] = static_cast<char>(char((src & LOW_6BITS) | BIT8)); // trailing 6 bits
479464
}
480465
}
481-
else
466+
// Check for high surrogate.
467+
else if (src >= H_SURROGATE_START && src <= H_SURROGATE_END)
482468
{
483-
// Check for high surrogate.
484-
if (src >= H_SURROGATE_START && src <= H_SURROGATE_END)
485-
{
486-
const auto highSurrogate{ src };
487-
const auto lowSurrogate{ srcData[++index] };
488-
489-
// To get from surrogate pair to Unicode code point:
490-
// - subract 0xD800 from high surrogate, this forms top ten bits
491-
// - subract 0xDC00 from low surrogate, this forms low ten bits
492-
// - add 0x10000
493-
// Leaves a code point in U+10000 to U+10FFFF range.
494-
uint32_t codePoint = highSurrogate - H_SURROGATE_START;
495-
codePoint <<= 10;
496-
codePoint |= lowSurrogate - L_SURROGATE_START;
497-
codePoint += SURROGATE_PAIR_START;
498-
499-
// 4 bytes need using 21 bits
500-
destData[destIndex++] = static_cast<char>((codePoint >> 18) | 0xF0); // leading 3 bits
501-
destData[destIndex++] = static_cast<char>(((codePoint >> 12) & LOW_6BITS) | BIT8); // next 6 bits
502-
destData[destIndex++] = static_cast<char>(((codePoint >> 6) & LOW_6BITS) | BIT8); // next 6 bits
503-
destData[destIndex++] = static_cast<char>((codePoint & LOW_6BITS) | BIT8); // trailing 6 bits
504-
}
505-
else // 3 bytes needed (16 bits used)
506-
{
507-
destData[destIndex++] = static_cast<char>((src >> 12) | 0xE0); // leading 4 bits
508-
destData[destIndex++] = static_cast<char>(((src >> 6) & LOW_6BITS) | BIT8); // middle 6 bits
509-
destData[destIndex++] = static_cast<char>((src & LOW_6BITS) | BIT8); // trailing 6 bits
510-
}
469+
const auto highSurrogate = src;
470+
const auto lowSurrogate = srcData[++index];
471+
472+
// To get from surrogate pair to Unicode code point:
473+
// - subract 0xD800 from high surrogate, this forms top ten bits
474+
// - subract 0xDC00 from low surrogate, this forms low ten bits
475+
// - add 0x10000
476+
// Leaves a code point in U+10000 to U+10FFFF range.
477+
uint32_t codePoint = highSurrogate - H_SURROGATE_START;
478+
codePoint <<= 10;
479+
codePoint |= lowSurrogate - L_SURROGATE_START;
480+
codePoint += SURROGATE_PAIR_START;
481+
482+
// 4 bytes need using 21 bits
483+
destData[destIndex++] = static_cast<char>((codePoint >> 18) | 0xF0); // leading 3 bits
484+
destData[destIndex++] = static_cast<char>(((codePoint >> 12) & LOW_6BITS) | BIT8); // next 6 bits
485+
destData[destIndex++] = static_cast<char>(((codePoint >> 6) & LOW_6BITS) | BIT8); // next 6 bits
486+
destData[destIndex++] = static_cast<char>((codePoint & LOW_6BITS) | BIT8); // trailing 6 bits
487+
}
488+
else // 3 bytes needed (16 bits used)
489+
{
490+
destData[destIndex++] = static_cast<char>((src >> 12) | 0xE0); // leading 4 bits
491+
destData[destIndex++] = static_cast<char>(((src >> 6) & LOW_6BITS) | BIT8); // middle 6 bits
492+
destData[destIndex++] = static_cast<char>((src & LOW_6BITS) | BIT8); // trailing 6 bits
511493
}
512494
}
513495

514496
return dest;
515-
#endif
516497
}
517498

518499
utf16string __cdecl conversions::usascii_to_utf16(const std::string &s)

0 commit comments

Comments
 (0)