Skip to content

Commit 1711416

Browse files
Simplify lexer_parse_number (parse_number) and add unicode support to the function.
JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan [email protected]
1 parent baf3748 commit 1711416

File tree

1 file changed

+102
-138
lines changed

1 file changed

+102
-138
lines changed

jerry-core/parser/js/lexer.cpp

Lines changed: 102 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -794,34 +794,35 @@ lexer_parse_identifier_or_keyword (void)
794794
return ret;
795795
} /* lexer_parse_identifier_or_keyword */
796796

797-
/* In this function we cannot use strtol function
798-
since there is no octal literals in ECMAscript. */
797+
/**
798+
* Parse numeric literal (ECMA-262, v5, 7.8.3)
799+
*
800+
* @return token of TOK_SMALL_INT or TOK_NUMBER types
801+
*/
799802
static token
800-
parse_number (void)
803+
lexer_parse_number (void)
801804
{
802805
ecma_char_t c = LA (0);
803806
bool is_hex = false;
804807
bool is_fp = false;
805-
bool is_exp = false;
806-
bool is_overflow = false;
807808
ecma_number_t fp_res = .0;
808809
size_t tok_length = 0, i;
809-
uint32_t res = 0;
810810
token known_token;
811811

812-
JERRY_ASSERT (isdigit (c) || c == '.');
812+
JERRY_ASSERT (lit_char_is_decimal_digit (c)
813+
|| c == LIT_CHAR_DOT);
813814

814-
if (c == '0')
815+
if (c == LIT_CHAR_0)
815816
{
816-
if (LA (1) == 'x' || LA (1) == 'X')
817+
if (LA (1) == LIT_CHAR_LOWERCASE_X
818+
|| LA (1) == LIT_CHAR_UPPERCASE_X)
817819
{
818820
is_hex = true;
819821
}
820822
}
821-
822-
if (c == '.')
823+
else if (c == LIT_CHAR_DOT)
823824
{
824-
JERRY_ASSERT (!isalpha (LA (1)));
825+
JERRY_ASSERT (lit_char_is_decimal_digit (LA (1)));
825826
is_fp = true;
826827
}
827828

@@ -834,189 +835,149 @@ parse_number (void)
834835
while (true)
835836
{
836837
c = LA (0);
837-
if (!isxdigit (c))
838+
if (!lit_char_is_hex_digit (c))
838839
{
839840
break;
840841
}
841842
consume_char ();
842843
}
843844

844-
if (isalpha (c) || c == '_' || c == '$')
845+
if (lexer_is_char_can_be_identifier_start (c))
845846
{
846-
PARSE_ERROR ("Integer literal shall not contain non-digit characters",
847+
PARSE_ERROR ("Identifier just after integer literal",
847848
lit_utf8_iterator_get_offset (&src_iter));
848849
}
849850

850851
tok_length = (size_t) (lit_utf8_iterator_get_ptr (&src_iter) - token_start);
851852

853+
/* token is constructed at end of function */
852854
for (i = 0; i < tok_length; i++)
853855
{
854-
if (!is_overflow)
855-
{
856-
res = (res << 4) + lit_char_hex_to_int (token_start[i]);
857-
}
858-
else
859-
{
860-
fp_res = fp_res * 16 + (ecma_number_t) lit_char_hex_to_int (token_start[i]);
861-
}
862-
863-
if (res > 255)
864-
{
865-
fp_res = (ecma_number_t) res;
866-
is_overflow = true;
867-
res = 0;
868-
}
869-
}
870-
871-
if (is_overflow)
872-
{
873-
known_token = convert_seen_num_to_token (fp_res);
874-
token_start = NULL;
875-
return known_token;
876-
}
877-
else
878-
{
879-
known_token = create_token (TOK_SMALL_INT, (uint8_t) res);
880-
token_start = NULL;
881-
return known_token;
856+
fp_res = fp_res * 16 + (ecma_number_t) lit_char_hex_to_int (token_start[i]);
882857
}
883858
}
884-
885-
JERRY_ASSERT (!is_hex && !is_exp);
886-
887-
new_token ();
888-
889-
// Eat up '.'
890-
if (is_fp)
859+
else
891860
{
892-
consume_char ();
893-
}
861+
bool is_exp = false;
894862

895-
while (true)
896-
{
897-
c = LA (0);
898-
if (is_fp && c == '.')
899-
{
900-
FIXME (/* This is wrong: 1..toString (). */)
901-
PARSE_ERROR ("Integer literal shall not contain more than one dot character",
902-
lit_utf8_iterator_get_offset (&src_iter));
903-
}
904-
if (is_exp && (c == 'e' || c == 'E'))
905-
{
906-
PARSE_ERROR ("Integer literal shall not contain more than exponential marker ('e' or 'E')",
907-
lit_utf8_iterator_get_offset (&src_iter));
908-
}
863+
new_token ();
909864

910-
if (c == '.')
865+
// Eat up '.'
866+
if (is_fp)
911867
{
912-
if (isalpha (LA (1)) || LA (1) == '_' || LA (1) == '$')
913-
{
914-
PARSE_ERROR ("Integer literal shall not contain non-digit character after got character",
915-
lit_utf8_iterator_get_offset (&src_iter));
916-
}
917-
is_fp = true;
918868
consume_char ();
919-
continue;
920869
}
921870

922-
if (c == 'e' || c == 'E')
871+
while (true)
923872
{
924-
if (LA (1) == '-' || LA (1) == '+')
873+
c = LA (0);
874+
875+
if (c == LIT_CHAR_DOT)
925876
{
926-
consume_char ();
877+
if (is_fp)
878+
{
879+
/* token is constructed at end of function */
880+
break;
881+
}
882+
else
883+
{
884+
is_fp = true;
885+
consume_char ();
886+
887+
continue;
888+
}
889+
}
890+
else if (c == LIT_CHAR_LOWERCASE_E
891+
|| c == LIT_CHAR_UPPERCASE_E)
892+
{
893+
if (is_exp)
894+
{
895+
PARSE_ERROR ("Numeric literal shall not contain more than exponential marker ('e' or 'E')",
896+
lit_utf8_iterator_get_offset (&src_iter));
897+
}
898+
else
899+
{
900+
is_exp = true;
901+
consume_char ();
902+
903+
if (LA (0) == LIT_CHAR_MINUS
904+
|| LA (0) == LIT_CHAR_PLUS)
905+
{
906+
consume_char ();
907+
}
908+
909+
continue;
910+
}
927911
}
928-
if (!isdigit (LA (1)))
912+
else if (!lit_char_is_decimal_digit (c))
929913
{
930-
PARSE_ERROR ("Integer literal shall not contain non-digit character after exponential marker ('e' or 'E')",
931-
lit_utf8_iterator_get_offset (&src_iter));
914+
if (lexer_is_char_can_be_identifier_start (c))
915+
{
916+
PARSE_ERROR ("Numeric literal shall not contain non-numeric characters",
917+
lit_utf8_iterator_get_offset (&src_iter));
918+
}
919+
920+
/* token is constructed at end of function */
921+
break;
932922
}
933-
is_exp = true;
923+
934924
consume_char ();
935-
continue;
936925
}
937926

938-
if (isalpha (c) || c == '_' || c == '$')
939-
{
940-
PARSE_ERROR ("Integer literal shall not contain non-digit characters",
941-
lit_utf8_iterator_get_offset (&src_iter));
942-
}
927+
tok_length = (size_t) (lit_utf8_iterator_get_ptr (&src_iter) - token_start);
943928

944-
if (!isdigit (c))
929+
if (is_fp || is_exp)
945930
{
946-
break;
947-
}
948-
949-
consume_char ();
950-
}
931+
ecma_number_t res = ecma_utf8_string_to_number (token_start, (jerry_api_size_t) tok_length);
932+
JERRY_ASSERT (!ecma_number_is_nan (res));
951933

952-
tok_length = (size_t) (lit_utf8_iterator_get_ptr (&src_iter) - token_start);
953-
if (is_fp || is_exp)
954-
{
955-
ecma_number_t res = ecma_utf8_string_to_number (token_start, (jerry_api_size_t) tok_length);
956-
JERRY_ASSERT (!ecma_number_is_nan (res));
957-
known_token = convert_seen_num_to_token (res);
958-
token_start = NULL;
959-
return known_token;
960-
}
934+
known_token = convert_seen_num_to_token (res);
935+
token_start = NULL;
961936

962-
if (*token_start == '0' && tok_length != 1)
963-
{
964-
if (strict_mode)
965-
{
966-
PARSE_ERROR ("Octal tnteger literals are not allowed in strict mode", token_start - buffer_start);
937+
return known_token;
967938
}
968-
for (i = 0; i < tok_length; i++)
939+
else if (*token_start == LIT_CHAR_0
940+
&& tok_length != 1)
969941
{
970-
if (!is_overflow)
942+
/* Octal integer literals */
943+
if (strict_mode)
971944
{
972-
res = res * 8 + lit_char_hex_to_int (token_start[i]);
945+
PARSE_ERROR ("Octal integer literals are not allowed in strict mode", token_start - buffer_start);
973946
}
974947
else
975948
{
976-
fp_res = fp_res * 8 + (ecma_number_t) lit_char_hex_to_int (token_start[i]);
977-
}
978-
if (res > 255)
979-
{
980-
fp_res = (ecma_number_t) res;
981-
is_overflow = true;
982-
res = 0;
949+
/* token is constructed at end of function */
950+
951+
for (i = 0; i < tok_length; i++)
952+
{
953+
fp_res = fp_res * 8 + (ecma_number_t) lit_char_hex_to_int (token_start[i]);
954+
}
983955
}
984956
}
985-
}
986-
else
987-
{
988-
for (i = 0; i < tok_length; i++)
957+
else
989958
{
990-
if (!is_overflow)
991-
{
992-
res = res * 10 + lit_char_hex_to_int (token_start[i]);
993-
}
994-
else
959+
/* token is constructed at end of function */
960+
961+
for (i = 0; i < tok_length; i++)
995962
{
996963
fp_res = fp_res * 10 + (ecma_number_t) lit_char_hex_to_int (token_start[i]);
997964
}
998-
if (res > 255)
999-
{
1000-
fp_res = (ecma_number_t) res;
1001-
is_overflow = true;
1002-
res = 0;
1003-
}
1004965
}
1005966
}
1006967

1007-
if (is_overflow)
968+
if (fp_res >= 0 && fp_res <= 255 && (uint8_t) fp_res == fp_res)
1008969
{
1009-
known_token = convert_seen_num_to_token (fp_res);
970+
known_token = create_token (TOK_SMALL_INT, (uint8_t) fp_res);
1010971
token_start = NULL;
1011972
return known_token;
1012973
}
1013974
else
1014975
{
1015-
known_token = create_token (TOK_SMALL_INT, (uint8_t) res);
976+
known_token = convert_seen_num_to_token (fp_res);
1016977
token_start = NULL;
1017978
return known_token;
1018979
}
1019-
}
980+
} /* lexer_parse_number */
1020981

1021982
/**
1022983
* Parse string literal (ECMA-262 v5, 7.8.4)
@@ -1243,9 +1204,12 @@ lexer_next_token_private (void)
12431204
return lexer_parse_identifier_or_keyword ();
12441205
}
12451206

1246-
if (isdigit (c) || (c == '.' && isdigit (LA (1))))
1207+
/* ECMA-262 v5, 7.8.3, Numeric literal */
1208+
if (lit_char_is_decimal_digit (c)
1209+
|| (c == LIT_CHAR_DOT
1210+
&& lit_char_is_decimal_digit (LA (1))))
12471211
{
1248-
return parse_number ();
1212+
return lexer_parse_number ();
12491213
}
12501214

12511215
if (c == '\n')

0 commit comments

Comments
 (0)