@@ -28,6 +28,11 @@ static token saved_token, prev_token, sent_token, empty_token;
2828static bool allow_dump_lines = false , strict_mode;
2929static size_t buffer_size = 0 ;
3030
31+ /*
32+ * FIXME:
33+ * jerry_api_char_t should not be used outside of API implementation
34+ */
35+
3136/* Represents the contents of a script. */
3237static const jerry_api_char_t *buffer_start = NULL ;
3338static const jerry_api_char_t *token_start;
@@ -157,6 +162,34 @@ lexer_create_token_for_charset (token_type tt, /**< token type */
157162 return create_token_from_lit (tt, lit);
158163} /* lexer_create_token_for_charset */
159164
165+ /* *
166+ * Check if the character falls into IdentifierStart group (ECMA-262 v5, 7.6)
167+ *
168+ * @return true / false
169+ */
170+ static bool
171+ lexer_is_char_can_be_identifier_start (ecma_char_t c) /* *< a character */
172+ {
173+ return (lit_char_is_unicode_letter (c)
174+ || c == LIT_CHAR_DOLLAR_SIGN
175+ || c == LIT_CHAR_UNDERSCORE
176+ || c == LIT_CHAR_BACKSLASH);
177+ } /* lexer_is_char_can_be_identifier_start */
178+
179+ /* *
180+ * Check if the character falls into IdentifierPart group (ECMA-262 v5, 7.6)
181+ *
182+ * @return true / false
183+ */
184+ static bool
185+ lexer_is_char_can_be_identifier_part (ecma_char_t c) /* *< a character */
186+ {
187+ return (lexer_is_char_can_be_identifier_start (c)
188+ || lit_char_is_unicode_combining_mark (c)
189+ || lit_char_is_unicode_digit (c)
190+ || lit_char_is_unicode_connector_punctuation (c));
191+ } /* lexer_is_char_can_be_identifier_part */
192+
160193/* *
161194 * Try to decode specified character as SingleEscapeCharacter (ECMA-262, v5, 7.8.4)
162195 *
@@ -652,28 +685,29 @@ consume_char (void)
652685 * TOK_BOOL - for BooleanLiteral
653686 */
654687static token
655- parse_name (void )
688+ lexer_parse_identifier_or_keyword (void )
656689{
657- ecma_char_t c = ( ecma_char_t ) LA (0 );
690+ ecma_char_t c = LA (0 );
658691
659- JERRY_ASSERT (isalpha (c) || c == ' $ ' || c == ' _ ' || c == ' \\ ' );
692+ JERRY_ASSERT (lexer_is_char_can_be_identifier_start (c));
660693
661694 new_token ();
662695
696+ bool is_correct_identifier_name = true ;
663697 bool is_escape_sequence_occured = false ;
664698 bool is_all_chars_were_lowercase_ascii = true ;
665699
666700 while (true )
667701 {
668- c = ( ecma_char_t ) LA (0 );
702+ c = LA (0 );
669703
670- if (c == ' \\ ' )
704+ if (c == LIT_CHAR_BACKSLASH )
671705 {
672706 consume_char ();
673707
674708 is_escape_sequence_occured = true ;
675709
676- bool is_unicode_escape_sequence = (LA (0 ) == ' u ' );
710+ bool is_unicode_escape_sequence = (LA (0 ) == LIT_CHAR_LOWERCASE_U );
677711 consume_char ();
678712
679713 if (is_unicode_escape_sequence)
@@ -684,36 +718,35 @@ parse_name (void)
684718 true ,
685719 &c))
686720 {
687- PARSE_ERROR (" Malformed escape sequence" , token_start - buffer_start);
721+ is_correct_identifier_name = false ;
722+ break ;
688723 }
689724 else
690725 {
691726 /* c now contains character, encoded in the UnicodeEscapeSequence */
692- if (!isalpha (c)
693- && !isdigit (c)
694- && c != ' $'
695- && c != ' _' )
727+
728+ // Check character, converted from UnicodeEscapeSequence
729+ if (!lexer_is_char_can_be_identifier_part (c))
696730 {
697- PARSE_ERROR (" Invalid character in identifier" , token_start - buffer_start);
731+ is_correct_identifier_name = false ;
732+ break ;
698733 }
699734 }
700735 }
701736 else
702737 {
703- PARSE_ERROR ( " Only unicode escape sequences are allowed in identifiers " ,
704- token_start - buffer_start) ;
738+ is_correct_identifier_name = false ;
739+ break ;
705740 }
706741 }
707- else if (!isalpha (c)
708- && !isdigit (c)
709- && c != ' $'
710- && c != ' _' )
742+ else if (!lexer_is_char_can_be_identifier_part (c))
711743 {
712744 break ;
713745 }
714746 else
715747 {
716- if (!islower (c))
748+ if (!(c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN
749+ && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END))
717750 {
718751 is_all_chars_were_lowercase_ascii = false ;
719752 }
@@ -722,6 +755,11 @@ parse_name (void)
722755 }
723756 }
724757
758+ if (!is_correct_identifier_name)
759+ {
760+ PARSE_ERROR (" Illegal identifier name" , lit_utf8_iterator_get_offset (&src_iter));
761+ }
762+
725763 const lit_utf8_size_t charset_size = (lit_utf8_size_t ) (lit_utf8_iterator_get_ptr (&src_iter) - token_start);
726764
727765 token ret = empty_token;
@@ -754,7 +792,7 @@ parse_name (void)
754792 token_start = NULL ;
755793
756794 return ret;
757- } /* parse_name */
795+ } /* lexer_parse_identifier_or_keyword */
758796
759797/* In this function we cannot use strtol function
760798 since there is no octal literals in ECMAscript. */
@@ -1199,9 +1237,10 @@ lexer_next_token_private (void)
11991237
12001238 JERRY_ASSERT (token_start == NULL );
12011239
1202- if (isalpha (c) || c == ' $' || c == ' _' || c == ' \\ ' )
1240+ /* ECMA-262 v5, 7.6, Identifier */
1241+ if (lexer_is_char_can_be_identifier_start (c))
12031242 {
1204- return parse_name ();
1243+ return lexer_parse_identifier_or_keyword ();
12051244 }
12061245
12071246 if (isdigit (c) || (c == ' .' && isdigit (LA (1 ))))
0 commit comments