2525#endif
2626#endif
2727
28- // Could use C++ standard library if not __GLIBCXX__,
29- // For testing purposes we just the handwritten on all platforms.
30- #if defined(CPPREST_STDLIB_UNICODE_CONVERSIONS)
31- #include < codecvt>
32- #endif
33-
3428using namespace web ;
3529using namespace utility ;
3630using namespace utility ::conversions;
@@ -346,10 +340,6 @@ inline size_t count_utf8_to_utf16(const std::string& s)
346340
347341utf16string __cdecl conversions::utf8_to_utf16 (const std::string &s)
348342{
349- #if defined(CPPREST_STDLIB_UNICODE_CONVERSIONS)
350- std::wstring_convert<std::codecvt_utf8_utf16<utf16char>, utf16char> conversion;
351- return conversion.from_bytes (s);
352- #else
353343 // Save repeated heap allocations, use the length of resulting sequence.
354344 const size_t srcSize = s.size ();
355345 const std::string::value_type* const srcData = &s[0 ];
@@ -391,7 +381,7 @@ utf16string __cdecl conversions::utf8_to_utf16(const std::string &s)
391381 {
392382 const char c2{ srcData[++index] };
393383 const char c3{ srcData[++index] };
394- destData[destIndex++] = (( src & LOW_4BITS) << 12 ) | ((c2 & LOW_6BITS) << 6 ) | (c3 & LOW_6BITS);
384+ destData[destIndex++] = static_cast <utf16string::value_type>((( src & LOW_4BITS) << 12 ) | ((c2 & LOW_6BITS) << 6 ) | (c3 & LOW_6BITS) );
395385 }
396386 break ;
397387 case 0xD0 : // 2 byte character, 0x80 to 0x7FF
@@ -406,7 +396,6 @@ utf16string __cdecl conversions::utf8_to_utf16(const std::string &s)
406396 }
407397 }
408398 return dest;
409- #endif
410399}
411400
412401
@@ -453,10 +442,6 @@ inline size_t count_utf16_to_utf8(const utf16string &w)
453442
454443std::string __cdecl conversions::utf16_to_utf8 (const utf16string &w)
455444{
456- #if defined(CPPREST_STDLIB_UNICODE_CONVERSIONS)
457- std::wstring_convert<std::codecvt_utf8_utf16<utf16char>, utf16char> conversion;
458- return conversion.to_bytes (w);
459- #else
460445 const size_t srcSize = w.size ();
461446 const utf16string::value_type* const srcData = &w[0 ];
462447 std::string dest (count_utf16_to_utf8 (w), ' \0 ' );
@@ -465,7 +450,7 @@ std::string __cdecl conversions::utf16_to_utf8(const utf16string &w)
465450
466451 for (size_t index = 0 ; index < srcSize; ++index)
467452 {
468- const utf16string::value_type src{ srcData[index] } ;
453+ const utf16string::value_type src = srcData[index];
469454 if (src <= 0x7FF )
470455 {
471456 if (src <= 0x7F ) // single byte character
@@ -478,41 +463,37 @@ std::string __cdecl conversions::utf16_to_utf8(const utf16string &w)
478463 destData[destIndex++] = static_cast <char >(char ((src & LOW_6BITS) | BIT8)); // trailing 6 bits
479464 }
480465 }
481- else
466+ // Check for high surrogate.
467+ else if (src >= H_SURROGATE_START && src <= H_SURROGATE_END)
482468 {
483- // Check for high surrogate.
484- if (src >= H_SURROGATE_START && src <= H_SURROGATE_END)
485- {
486- const auto highSurrogate{ src };
487- const auto lowSurrogate{ srcData[++index] };
488-
489- // To get from surrogate pair to Unicode code point:
490- // - subract 0xD800 from high surrogate, this forms top ten bits
491- // - subract 0xDC00 from low surrogate, this forms low ten bits
492- // - add 0x10000
493- // Leaves a code point in U+10000 to U+10FFFF range.
494- uint32_t codePoint = highSurrogate - H_SURROGATE_START;
495- codePoint <<= 10 ;
496- codePoint |= lowSurrogate - L_SURROGATE_START;
497- codePoint += SURROGATE_PAIR_START;
498-
499- // 4 bytes need using 21 bits
500- destData[destIndex++] = static_cast <char >((codePoint >> 18 ) | 0xF0 ); // leading 3 bits
501- destData[destIndex++] = static_cast <char >(((codePoint >> 12 ) & LOW_6BITS) | BIT8); // next 6 bits
502- destData[destIndex++] = static_cast <char >(((codePoint >> 6 ) & LOW_6BITS) | BIT8); // next 6 bits
503- destData[destIndex++] = static_cast <char >((codePoint & LOW_6BITS) | BIT8); // trailing 6 bits
504- }
505- else // 3 bytes needed (16 bits used)
506- {
507- destData[destIndex++] = static_cast <char >((src >> 12 ) | 0xE0 ); // leading 4 bits
508- destData[destIndex++] = static_cast <char >(((src >> 6 ) & LOW_6BITS) | BIT8); // middle 6 bits
509- destData[destIndex++] = static_cast <char >((src & LOW_6BITS) | BIT8); // trailing 6 bits
510- }
469+ const auto highSurrogate = src;
470+ const auto lowSurrogate = srcData[++index];
471+
472+ // To get from surrogate pair to Unicode code point:
473+ // - subract 0xD800 from high surrogate, this forms top ten bits
474+ // - subract 0xDC00 from low surrogate, this forms low ten bits
475+ // - add 0x10000
476+ // Leaves a code point in U+10000 to U+10FFFF range.
477+ uint32_t codePoint = highSurrogate - H_SURROGATE_START;
478+ codePoint <<= 10 ;
479+ codePoint |= lowSurrogate - L_SURROGATE_START;
480+ codePoint += SURROGATE_PAIR_START;
481+
482+ // 4 bytes need using 21 bits
483+ destData[destIndex++] = static_cast <char >((codePoint >> 18 ) | 0xF0 ); // leading 3 bits
484+ destData[destIndex++] = static_cast <char >(((codePoint >> 12 ) & LOW_6BITS) | BIT8); // next 6 bits
485+ destData[destIndex++] = static_cast <char >(((codePoint >> 6 ) & LOW_6BITS) | BIT8); // next 6 bits
486+ destData[destIndex++] = static_cast <char >((codePoint & LOW_6BITS) | BIT8); // trailing 6 bits
487+ }
488+ else // 3 bytes needed (16 bits used)
489+ {
490+ destData[destIndex++] = static_cast <char >((src >> 12 ) | 0xE0 ); // leading 4 bits
491+ destData[destIndex++] = static_cast <char >(((src >> 6 ) & LOW_6BITS) | BIT8); // middle 6 bits
492+ destData[destIndex++] = static_cast <char >((src & LOW_6BITS) | BIT8); // trailing 6 bits
511493 }
512494 }
513495
514496 return dest;
515- #endif
516497}
517498
518499utf16string __cdecl conversions::usascii_to_utf16 (const std::string &s)
0 commit comments