@@ -778,46 +778,77 @@ inline bool JSON_Parser::handle_unescape_char(Token &token)
778778 return true ;
779779 case ' u' :
780780 {
781- // A four-hexdigit Unicode character.
782- // Transform into a 16 bit code point.
783- int decoded = 0 ;
784- for (int i = 0 ; i < 4 ; ++i)
785- {
786- int ch_int = NextCharacter ();
787- if (ch_int < 0 || ch_int > 127 )
788- return false ;
781+ auto decode_utf16_unit = [](JSON_Parser& parser, json_error& ec) {
782+ // A four-hexdigit Unicode character.
783+ // Transform into a 16 bit code point.
784+ int decoded = 0 ;
785+ for (int i = 0 ; i < 4 ; ++i)
786+ {
787+ int ch_int = parser.NextCharacter ();
788+ if (ch_int < 0 || ch_int > 127 ) {
789+ ec = json_error::malformed_string_literal;
790+ return 0 ;
791+ }
789792#ifdef _WIN32
790- const int isxdigitResult = _isxdigit_l (ch_int, utility::details::scoped_c_thread_locale::c_locale ());
793+ const int isxdigitResult = _isxdigit_l (ch_int, utility::details::scoped_c_thread_locale::c_locale ());
791794#else
792- const int isxdigitResult = isxdigit (ch_int);
795+ const int isxdigitResult = isxdigit (ch_int);
793796#endif
794- if (!isxdigitResult)
795- return false ;
797+ if (!isxdigitResult)
798+ {
799+ ec = json_error::malformed_string_literal;
800+ return 0 ;
801+ }
796802
797- int val = _hexval[static_cast <size_t >(ch_int)];
798- _ASSERTE (val != -1 );
803+ int val = _hexval[static_cast <size_t >(ch_int)];
804+ _ASSERTE (val != -1 );
799805
800- // Add the input char to the decoded number
801- decoded |= (val << (4 * (3 - i)));
802- }
806+ // Add the input char to the decoded number
807+ decoded |= (val << (4 * (3 - i)));
808+ }
809+
810+ return decoded;
811+ };
803812
804813 // Construct the character based on the decoded number
805814 // Convert the code unit into a UTF-8 sequence
806- // TODO: Improve detection of surrogate pair + error handling
807815 utf16string utf16;
816+ auto decoded = decode_utf16_unit (*this , token.m_error );
817+ if (token.m_error )
818+ return false ;
808819 utf16.push_back (static_cast <utf16char>(decoded));
809- utf8string utf8;
820+
821+ if (decoded >= 0xD800 )
822+ {
823+ // Decoded a high surrogate. Attempt to grab low surrogate.
824+ if (NextCharacter () != ' \\ ' )
825+ {
826+ token.m_error = json_error::malformed_string_literal;
827+ return false ;
828+ }
829+ if (NextCharacter () != ' u' )
830+ {
831+ token.m_error = json_error::malformed_string_literal;
832+ return false ;
833+ }
834+ decoded = decode_utf16_unit (*this , token.m_error );
835+ if (token.m_error )
836+ return false ;
837+ utf16.push_back (static_cast <utf16char>(decoded));
838+ }
839+
810840 try
811841 {
842+ utf8string utf8;
812843 utf8 = ::utility::conversions::utf16_to_utf8 (utf16);
844+ token.string_val .append (utf8);
845+ return true ;
813846 }
814847 catch (...)
815848 {
816849 token.m_error = json_error::malformed_string_literal;
850+ return false ;
817851 }
818- token.string_val .append (utf8);
819-
820- return true ;
821852 }
822853 default :
823854 // BUG: This is incorrect behavior; all characters MAY be escaped, and should be added as-is.
0 commit comments