From caa1617ea2c5ab2fdc9e0cf7a491f77ad0b0acac Mon Sep 17 00:00:00 2001 From: Laszlo Vidacs Date: Mon, 15 Jun 2015 15:37:27 +0200 Subject: [PATCH 01/18] Implement String.prototype.trim() JerryScript-DCO-1.0-Signed-off-by: Laszlo Vidacs lvidacs.u-szeged@partner.samsung.com --- .../ecma-builtin-string-prototype.cpp | 62 ++++++++++++++++- tests/jerry/string-prototype-trim.js | 69 +++++++++++++++++++ 2 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 tests/jerry/string-prototype-trim.js diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp index d2d139c2ee..83f14a23f5 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp @@ -25,6 +25,7 @@ #include "ecma-string-object.h" #include "ecma-try-catch-macro.h" #include "jrt.h" +#include "jrt-libc-includes.h" #ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_STRING_BUILTIN @@ -554,7 +555,66 @@ ecma_builtin_string_prototype_object_to_locale_upper_case (ecma_value_t this_arg static ecma_completion_value_t ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argument */ { - ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg); + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + /* 1 */ + ECMA_TRY_CATCH (check_coercible_val, + ecma_op_check_object_coercible (this_arg), + ret_value); + + /* 2 */ + ECMA_TRY_CATCH (to_string_val, + ecma_op_to_string (this_arg), + ret_value); + + ecma_string_t *original_string_p = ecma_get_string_from_value (to_string_val); + JERRY_ASSERT (ecma_string_get_length (original_string_p) >= 0); + + /* 3 */ + const uint32_t len = (uint32_t) ecma_string_get_length (original_string_p); + + /* Workaround: avoid repeated call of ecma_string_get_char_at_pos() because its overhead */ + uint32_t zt_str_size = (uint32_t) sizeof (ecma_char_t) * (len + 1); + ecma_char_t *original_zt_str_p = (ecma_char_t*) mem_heap_alloc_block (zt_str_size, + MEM_HEAP_ALLOC_SHORT_TERM); + ecma_string_to_zt_string (original_string_p, original_zt_str_p, (ssize_t) zt_str_size); + + uint32_t prefix = 0, postfix = 0; + uint32_t new_len = 0; + + while (prefix < len && isspace (original_zt_str_p[prefix])) + { + prefix++; + } + + while (postfix < len - prefix && isspace (original_zt_str_p[len - postfix - 1])) + { + postfix++; + } + + new_len = prefix < len ? len - prefix - postfix : 0; + + MEM_DEFINE_LOCAL_ARRAY (new_str_buffer, new_len + 1, ecma_char_t); + + for (uint32_t idx = 0; idx < new_len; ++idx) + { + new_str_buffer[idx] = original_zt_str_p[idx + prefix]; + } + + new_str_buffer[new_len] = '\0'; + ecma_string_t *new_str_p = ecma_new_ecma_string ((ecma_char_t *) new_str_buffer); + + /* 4 */ + ret_value = ecma_make_normal_completion_value (ecma_make_string_value (new_str_p)); + + MEM_FINALIZE_LOCAL_ARRAY (new_str_buffer); + + mem_heap_free_block (original_zt_str_p); + + ECMA_FINALIZE (to_string_val); + ECMA_FINALIZE (check_coercible_val); + + return ret_value; } /* ecma_builtin_string_prototype_object_trim */ /** diff --git a/tests/jerry/string-prototype-trim.js b/tests/jerry/string-prototype-trim.js new file mode 100644 index 0000000000..41e6e4b3d0 --- /dev/null +++ b/tests/jerry/string-prototype-trim.js @@ -0,0 +1,69 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// check properties +assert(Object.getOwnPropertyDescriptor(String.prototype.trim, 'length').configurable === false); + +assert(Object.getOwnPropertyDescriptor(String.prototype.trim, 'length').enumerable === false); + +assert(Object.getOwnPropertyDescriptor(String.prototype.trim, 'length').writable === false); + +assert(String.prototype.trim.length === 0); + +// check this value +assert(String.prototype.trim.call(new String()) === ""); + +assert(String.prototype.trim.call({}) === "[object Object]"); + +// check undefined +try { + String.prototype.trim.call(undefined); + assert(false); +} catch(e) { + assert(e instanceof TypeError); +} + +// check null +try { + String.prototype.trim.call(null); + assert(false); +} catch(e) { + assert(e instanceof TypeError); +} + +// simple checks +assert(" hello world".trim() === "hello world"); + +assert("hello world ".trim() === "hello world"); + +assert(" hello world ".trim() === "hello world"); + +assert("\t hello world\n".trim() === "hello world"); + +assert("\t\n hello world\t \n ".trim() === "hello world"); + +assert("hello world\n \t\t".trim() === "hello world"); + +assert(" hello world \\ ".trim() === "hello world \\"); + +assert("**hello world**".trim() === "**hello world**"); + +assert(" \t \n".trim() === ""); + +assert(" ".trim() === ""); + +assert("".trim() === ""); + +// FIXME: add unicode tests when unicode support available From 3f28cb3bf876f121343719364a5d074e243a17c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zsolt=20Borb=C3=A9ly?= Date: Tue, 16 Jun 2015 12:37:28 +0200 Subject: [PATCH 02/18] Fix the indexing of Array builtin functions. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The index-dependant builtins didn't handle correctly the positive Infinity value. JerryScript-DCO-1.0-Signed-off-by: Zsolt Borbély zsborbely.u-szeged@partner.samsung.com --- .../ecma-builtin-array-prototype.cpp | 206 ++++++------------ .../builtin-objects/ecma-builtin-helpers.cpp | 63 ++++++ .../builtin-objects/ecma-builtin-helpers.h | 1 + .../ecma-builtin-string-prototype.cpp | 69 +----- tests/jerry/array-prototype-indexof.js | 4 + tests/jerry/array-prototype-lastindexof.js | 4 + tests/jerry/array-prototype-slice.js | 13 ++ tests/jerry/array-prototype-splice.js | 31 +++ 8 files changed, 187 insertions(+), 204 deletions(-) diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-array-prototype.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-array-prototype.cpp index ab4a5977e7..d54c399d1a 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-array-prototype.cpp +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-array-prototype.cpp @@ -1,4 +1,5 @@ /* Copyright 2014-2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -892,46 +893,20 @@ ecma_builtin_array_prototype_object_index_of (ecma_value_t this_arg, /**< this a /* 5. */ ECMA_OP_TO_NUMBER_TRY_CATCH (arg_from_idx, arg2, ret_value); - int32_t from_idx_int = ecma_number_to_int32 (arg_from_idx); + uint32_t from_idx = ecma_builtin_helper_array_index_normalize (arg_from_idx, len); /* 6. */ - if (from_idx_int > 0 && (uint32_t) from_idx_int >= len) + if (from_idx >= len) { ret_value = ecma_make_normal_completion_value (ecma_make_number_value (num_p)); } else { - uint32_t k; + JERRY_ASSERT (from_idx < len); - /* 7 */ - if (from_idx_int >= 0) + for (; from_idx < len && *num_p < 0 && ecma_is_completion_value_empty (ret_value); from_idx++) { - k = (uint32_t) from_idx_int; - } - /* 8. */ - else - { - from_idx_int = -from_idx_int; - - /* As opposed to the standard, we prevent k from being negative, so that we can use an uint32 */ - if ((uint32_t) from_idx_int < len) - { - /* 8.a */ - k = len - (uint32_t) from_idx_int; - } - /* If k would've been negative */ - else - { - /* 8.b */ - k = 0; - } - - } - JERRY_ASSERT (k < len); - - for (; k < len && *num_p < 0 && ecma_is_completion_value_empty (ret_value); k++) - { - ecma_string_t *idx_str_p = ecma_new_ecma_string_from_uint32 (k); + ecma_string_t *idx_str_p = ecma_new_ecma_string_from_uint32 (from_idx); /* 9.a */ if (ecma_op_object_get_property (obj_p, idx_str_p) != NULL) @@ -942,7 +917,7 @@ ecma_builtin_array_prototype_object_index_of (ecma_value_t this_arg, /**< this a /* 9.b.ii */ if (ecma_op_strict_equality_compare (arg1, get_value)) { - *num_p = ecma_uint32_to_number (k); + *num_p = ecma_uint32_to_number (from_idx); } ECMA_FINALIZE (get_value); @@ -1019,59 +994,75 @@ ecma_builtin_array_prototype_object_last_index_of (ecma_value_t this_arg, /**< t } else { - uint32_t k = len - 1; + uint32_t from_idx = len - 1; /* 5. */ if (!ecma_is_value_undefined (arg2)) { ECMA_OP_TO_NUMBER_TRY_CATCH (arg_from_idx, arg2, ret_value); - int32_t n = ecma_number_to_int32 (arg_from_idx); - /* 6. */ - if (n >= 0) + if (!ecma_number_is_nan (arg_from_idx)) { - /* min(n, len - 1)*/ - if ((uint32_t) n > len - 1) + + if (ecma_number_is_infinity (arg_from_idx)) { - k = len - 1; + from_idx = ecma_number_is_negative (arg_from_idx) ? (uint32_t) -1 : len - 1; } else { - k = (uint32_t) n; + int32_t int_from_idx = ecma_number_to_int32 (arg_from_idx); + + /* 6. */ + if (int_from_idx >= 0) + { + /* min(int_from_idx, len - 1)*/ + if ((uint32_t) int_from_idx > len - 1) + { + from_idx = len - 1; + } + else + { + from_idx = (uint32_t) int_from_idx; + } + } + /* 7. */ + else + { + int_from_idx = -int_from_idx; + + /* We prevent from_idx from being negative, so that we can use an uint32 */ + if ((uint32_t) int_from_idx <= len) + { + from_idx = len - (uint32_t) int_from_idx; + } + else + { + /* + * If from_idx would be negative, we set it to UINT_MAX. See reasoning for this in the comment + * at the for loop below. + */ + from_idx = (uint32_t) -1; + } + } } } - /* 7. */ else { - n = -n; - - /* We prevent k from being negative, so that we can use an uint32 */ - if ((uint32_t) n <= len) - { - k = len - (uint32_t) n; - } - else - { - /* - * If k would be negative, we set it to UINT_MAX. See reasoning for this in the comment - * at the for loop below. - */ - k = (uint32_t) -1; - } + from_idx = 0; } ECMA_OP_TO_NUMBER_FINALIZE (arg_from_idx); } /* 8. - * We should break from the loop when k < 0. We can still use an uint32_t for k, and check - * for an underflow instead. This is safe, because k will always start in [0, len - 1], - * and len is in [0, UINT_MAX], so k >= len means we've had an underflow, and should stop. + * We should break from the loop when from_idx < 0. We can still use an uint32_t for from_idx, and check + * for an underflow instead. This is safe, because from_idx will always start in [0, len - 1], + * and len is in [0, UINT_MAX], so from_idx >= len means we've had an underflow, and should stop. */ - for (;k < len && *num_p < 0 && ecma_is_completion_value_empty (ret_value); k--) + for (; from_idx < len && *num_p < 0 && ecma_is_completion_value_empty (ret_value); from_idx--) { /* 8.a */ - ecma_string_t *idx_str_p = ecma_new_ecma_string_from_uint32 (k); + ecma_string_t *idx_str_p = ecma_new_ecma_string_from_uint32 (from_idx); /* 8.a */ if (ecma_op_object_get_property (obj_p, idx_str_p) != NULL) @@ -1082,7 +1073,7 @@ ecma_builtin_array_prototype_object_last_index_of (ecma_value_t this_arg, /**< t /* 8.b.ii */ if (ecma_op_strict_equality_compare (arg1, get_value)) { - *num_p = ecma_uint32_to_number (k); + *num_p = ecma_uint32_to_number (from_idx); } ECMA_FINALIZE (get_value); @@ -2029,30 +2020,8 @@ ecma_builtin_array_prototype_object_slice (ecma_value_t this_arg, /**< 'this' ar /* 5. */ ECMA_OP_TO_NUMBER_TRY_CATCH (start_num, arg1, ret_value); - int32_t relative_start = ecma_number_to_int32 (start_num); - /* 6. */ - if (relative_start < 0) - { - uint32_t start_abs = (uint32_t) -relative_start; - - if (start_abs > len) - { - start = 0; - } - else - { - start = len - start_abs; - } - } - else - { - start = (uint32_t) relative_start; - if (start > len) - { - start = len; - } - } + start = ecma_builtin_helper_array_index_normalize (start_num, len); /* 7. */ if (ecma_is_value_undefined (arg2)) @@ -2062,30 +2031,9 @@ ecma_builtin_array_prototype_object_slice (ecma_value_t this_arg, /**< 'this' ar else { /* 7. part 2*/ - ECMA_OP_TO_NUMBER_TRY_CATCH (end_num, arg2, ret_value) - int32_t relative_end = ecma_number_to_int32 (end_num); + ECMA_OP_TO_NUMBER_TRY_CATCH (end_num, arg2, ret_value); - if (relative_end < 0) - { - uint32_t end_abs = (uint32_t) -relative_end; - - if (end_abs > len) - { - end = 0; - } - else - { - end = len - end_abs; - } - } - else - { - end = (uint32_t) relative_end; - if (end > len) - { - end = len; - } - } + end = ecma_builtin_helper_array_index_normalize (end_num, len); ECMA_OP_TO_NUMBER_FINALIZE (end_num); } @@ -2197,29 +2145,7 @@ ecma_builtin_array_prototype_object_splice (ecma_value_t this_arg, /**< this arg args[0], ret_value); - int32_t relative_start = ecma_number_to_int32 (start_num); - - /* 6. */ - if (relative_start < 0) - { - uint32_t start_abs = (uint32_t) - relative_start; - if (start_abs > len) - { - start = 0; - } - else - { - start = len - start_abs; - } - } - else - { - start = (uint32_t) relative_start; - if (start > len) - { - start = len; - } - } + start = ecma_builtin_helper_array_index_normalize (start_num, len); /* * If there is only one argument, that will be the start argument, @@ -2236,22 +2162,22 @@ ecma_builtin_array_prototype_object_splice (ecma_value_t this_arg, /**< this arg args[1], ret_value); - int32_t delete_count_int = ecma_number_to_int32 (delete_num); - - if (delete_count_int > 0) + if (!ecma_number_is_nan (delete_num)) { - delete_count = (uint32_t) delete_count_int; + if (ecma_number_is_negative (delete_num)) + { + delete_count = 0; + } + else + { + delete_count = ecma_number_is_infinity (delete_num) ? len : ecma_number_to_uint32 (delete_num); + } } else { delete_count = 0; } - if (len - start < delete_count) - { - delete_count = len - start; - } - ECMA_OP_TO_NUMBER_FINALIZE (delete_num); } diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.cpp index c03c874e5f..4f3e0eee31 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.cpp +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.cpp @@ -270,6 +270,69 @@ ecma_builtin_helper_object_get_properties (ecma_object_t *obj_p, /** < object */ return new_array; } /* ecma_builtin_helper_object_get_properties */ +/** + * Helper function to normalizing an array index + * + * This function clamps the given index to the [0, length] range. + * If the index is negative, it is used as the offset from the end of the array, + * to compute normalized index. + * If the index is greater than the length of the array, the normalized index will be the length of the array. + * + * See also: + * ECMA-262 v5, 15.4.4.10 steps 5-6, 7 (part 2) and 8 + * ECMA-262 v5, 15.4.4.12 steps 5-6 + * ECMA-262 v5, 15.4.4.14 steps 5 + * ECMA-262 v5, 15.5.4.13 steps 4, 5 (part 2) and 6-7 + * + * Used by: + * - The Array.prototype.slice routine. + * - The Array.prototype.splice routine. + * - The Array.prototype.indexOf routine. + * - The String.prototype.slice routine. + * + * @return uint32_t - the normalized value of the index + */ +uint32_t +ecma_builtin_helper_array_index_normalize (ecma_number_t index, /**< index */ + uint32_t length) /**< array's length */ +{ + uint32_t norm_index; + + if (!ecma_number_is_nan (index)) + { + + if (ecma_number_is_infinity (index)) + { + norm_index = ecma_number_is_negative (index) ? 0 : length; + } + else + { + const int32_t int_index = ecma_number_to_int32 (index); + + if (int_index < 0) + { + const uint32_t uint_index = (uint32_t) - int_index; + norm_index = uint_index > length ? 0 : length - uint_index; + } + else + { + norm_index = (uint32_t) int_index; + + if (norm_index > length) + { + norm_index = length; + } + } + } + } + else + { + norm_index = 0; + } + + return norm_index; +} /* ecma_builtin_helper_array_index_normalize */ + /** * @} * @} diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h index 39a6b97285..0660b8344c 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-helpers.h @@ -30,6 +30,7 @@ extern ecma_completion_value_t ecma_builtin_helper_object_to_string (const ecma_ extern ecma_completion_value_t ecma_builtin_helper_get_to_locale_string_at_index (ecma_object_t *obj_p, uint32_t index); extern ecma_completion_value_t ecma_builtin_helper_object_get_properties (ecma_object_t *obj, bool only_enumerable_properties); +extern uint32_t ecma_builtin_helper_array_index_normalize (ecma_number_t index, uint32_t length); /** * @} diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp index 83f14a23f5..94fbe794b6 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp @@ -15,6 +15,7 @@ */ #include "ecma-alloc.h" +#include "ecma-builtin-helpers.h" #include "ecma-builtins.h" #include "ecma-conversion.h" #include "ecma-exceptions.h" @@ -332,46 +333,16 @@ ecma_builtin_string_prototype_object_slice (ecma_value_t this_arg, /**< this arg const uint32_t len = (uint32_t) ecma_string_get_length (get_string_val); - /* 4. */ + /* 4. 6. */ uint32_t start = 0, end = len; ECMA_OP_TO_NUMBER_TRY_CATCH (start_num, arg1, ret_value); - if (!ecma_number_is_nan (start_num)) - { + start = ecma_builtin_helper_array_index_normalize (start_num, len); - if (ecma_number_is_infinity (start_num)) - { - start = ecma_number_is_negative (start_num) ? 0 : len; - } - else - { - const int int_start = ecma_number_to_int32 (start_num); - - if (int_start < 0) - { - const uint32_t start_abs = (uint32_t) - int_start; - start = start_abs > len ? 0 : len - start_abs; - } - else - { - start = (uint32_t) int_start; - - if (start > len) - { - start = len; - } - } - } - } - else - { - start = 0; - } - - /* 5. */ + /* 5. 7. */ if (ecma_is_value_undefined (arg2)) { end = len; @@ -382,37 +353,7 @@ ecma_builtin_string_prototype_object_slice (ecma_value_t this_arg, /**< this arg arg2, ret_value); - if (!ecma_number_is_nan (end_num)) - { - - if (ecma_number_is_infinity (end_num)) - { - end = ecma_number_is_negative (end_num) ? 0 : len; - } - else - { - const int32_t int_end = ecma_number_to_int32 (end_num); - - if (int_end < 0) - { - const uint32_t end_abs = (uint32_t) - int_end; - end = end_abs > len ? 0 : len - end_abs; - } - else - { - end = (uint32_t) int_end; - - if (end > len) - { - end = len; - } - } - } - } - else - { - end = 0; - } + end = ecma_builtin_helper_array_index_normalize (end_num, len); ECMA_OP_TO_NUMBER_FINALIZE (end_num); } diff --git a/tests/jerry/array-prototype-indexof.js b/tests/jerry/array-prototype-indexof.js index 15511c6b0b..5972fdd5f5 100644 --- a/tests/jerry/array-prototype-indexof.js +++ b/tests/jerry/array-prototype-indexof.js @@ -33,6 +33,10 @@ var index = array.indexOf(obj); assert(index === 3); assert(array[index] === obj); +assert(array.indexOf("foo", NaN) === 0); +assert(array.indexOf("foo", Infinity) === -1); +assert(array.indexOf("foo", -Infinity) === 0); + // Checking behavior when length is zero var obj = { indexOf : Array.prototype.indexOf, length : 0 }; assert(obj.indexOf("foo") === -1); diff --git a/tests/jerry/array-prototype-lastindexof.js b/tests/jerry/array-prototype-lastindexof.js index 32ca31460c..ba71eab1b9 100644 --- a/tests/jerry/array-prototype-lastindexof.js +++ b/tests/jerry/array-prototype-lastindexof.js @@ -33,6 +33,10 @@ var index = array.lastIndexOf(obj); assert(index === 3); assert(array[index] === obj); +assert(array.lastIndexOf("foo", NaN) === 0); +assert(array.lastIndexOf("foo", Infinity) === 4); +assert(array.lastIndexOf("foo", -Infinity) === -1); + var arr = []; arr[4294967294] = "foo"; assert(arr.lastIndexOf("foo", -1) === 4294967294) diff --git a/tests/jerry/array-prototype-slice.js b/tests/jerry/array-prototype-slice.js index 7fdfba8d48..9948418a27 100644 --- a/tests/jerry/array-prototype-slice.js +++ b/tests/jerry/array-prototype-slice.js @@ -20,6 +20,9 @@ var array2 = array.slice("a", "3"); var array3 = array.slice(-2); var array4 = array.slice(-12, undefined); var array5 = array.slice(undefined, -3); +var array6 = array.slice(Infinity, NaN); +var array7 = array.slice(-Infinity, Infinity); +var array8 = array.slice(NaN, -Infinity); assert (array1.length == 4); assert (array1[0] == 54); @@ -45,6 +48,16 @@ assert (array4[3] == -127); assert (array5.length == 1); assert (array5[0] == 54); +assert (array6.length == 0); + +assert (array7.length == 4); +assert (array7[0] == 54); +assert (array7[1] == undefined); +assert (array7[2] == "Lemon"); +assert (array7[3] == -127); + +assert (array8.length == 0); + // Checking behavior when unable to get length var obj = { slice : Array.prototype.slice }; Object.defineProperty(obj, 'length', { 'get' : function () { throw new ReferenceError ("foo"); } }); diff --git a/tests/jerry/array-prototype-splice.js b/tests/jerry/array-prototype-splice.js index e4c79f1c7e..14789f0f83 100644 --- a/tests/jerry/array-prototype-splice.js +++ b/tests/jerry/array-prototype-splice.js @@ -88,6 +88,37 @@ assert (array[3] == -127); assert (array[4] == "sunshine"); assert (array6.length == 0); +// -------------------------------------------------------- +array = setDefaultValues(); +var array7 = array.splice(Infinity, NaN); +assert (array.length == 4); +assert (array[0] == 54); +assert (array[1] == undefined); +assert (array[2] == -127); +assert (array[3] == "sunshine"); +assert (array7.length == 0); + +// -------------------------------------------------------- +array = setDefaultValues(); +var array8 = array.splice(-Infinity, Infinity); + +assert (array.length == 0); +assert (array8.length == 4); +assert (array8[0] == 54); +assert (array8[1] == undefined); +assert (array8[2] == -127); +assert (array8[3] == "sunshine"); + +// -------------------------------------------------------- +array = setDefaultValues(); +var array9 = array.splice(NaN, -Infinity); +assert (array.length == 4); +assert (array[0] == 54); +assert (array[1] == undefined); +assert (array[2] == -127); +assert (array[3] == "sunshine"); +assert (array9.length == 0); + // Checking behavior when unable to get length var obj = {splice : Array.prototype.splice}; Object.defineProperty(obj, 'length', { 'get' : function () { throw new ReferenceError ("foo"); } }); From 61ab2051302d499784ffe488ffe5f729df61ac36 Mon Sep 17 00:00:00 2001 From: Kristof Kosztyo Date: Thu, 18 Jun 2015 17:10:02 +0200 Subject: [PATCH 03/18] Implement Object.create function JerryScript-DCO-1.0-Signed-off-by: Kristof Kosztyo kkosztyo.u-szeged@partner.samsung.com --- .../builtin-objects/ecma-builtin-object.cpp | 41 ++++- .../ecma/operations/ecma-objects-general.cpp | 36 +++-- .../ecma/operations/ecma-objects-general.h | 3 +- tests/jerry/object-create.js | 146 ++++++++++++++++++ 4 files changed, 215 insertions(+), 11 deletions(-) create mode 100644 tests/jerry/object-create.js diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-object.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-object.cpp index 474355ff92..30c34a897b 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-object.cpp +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-object.cpp @@ -637,7 +637,46 @@ ecma_builtin_object_object_create (ecma_value_t this_arg, /**< 'this' argument * ecma_value_t arg1, /**< routine's first argument */ ecma_value_t arg2) /**< routine's second argument */ { - ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg, arg1, arg2); + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + // 1. + if (!ecma_is_value_object (arg1) && !ecma_is_value_null (arg1)) + { + ret_value = ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_TYPE)); + } + else + { + ecma_object_t *obj_p = NULL; + + if (!ecma_is_value_null (arg1)) + { + obj_p = ecma_get_object_from_value (arg1); + } + // 2-3. + ecma_object_t *result_obj_p = ecma_op_create_object_object_noarg_and_set_prototype (obj_p); + + // 4. + if (!ecma_is_value_undefined (arg2)) + { + ECMA_TRY_CATCH (obj, + ecma_builtin_object_object_define_properties (this_arg, + ecma_make_object_value (result_obj_p), + arg2), + ret_value); + ECMA_FINALIZE (obj); + } + + // 5. + if (ecma_is_completion_value_empty (ret_value)) + { + ret_value = ecma_make_normal_completion_value (ecma_copy_value (ecma_make_object_value (result_obj_p), + true)); + } + + ecma_deref_object (result_obj_p); + } + + return ret_value; } /* ecma_builtin_object_object_create */ /** diff --git a/jerry-core/ecma/operations/ecma-objects-general.cpp b/jerry-core/ecma/operations/ecma-objects-general.cpp index 1be997c762..d42e9067d1 100644 --- a/jerry-core/ecma/operations/ecma-objects-general.cpp +++ b/jerry-core/ecma/operations/ecma-objects-general.cpp @@ -62,18 +62,10 @@ ecma_op_create_object_object_noarg (void) ecma_object_t *object_prototype_p = ecma_builtin_get (ECMA_BUILTIN_ID_OBJECT_PROTOTYPE); // 3., 4., 6., 7. - ecma_object_t *obj_p = ecma_create_object (object_prototype_p, true, ECMA_OBJECT_TYPE_GENERAL); + ecma_object_t *obj_p = ecma_op_create_object_object_noarg_and_set_prototype (object_prototype_p); ecma_deref_object (object_prototype_p); - /* - * [[Class]] property of ECMA_OBJECT_TYPE_GENERAL type objects - * without ECMA_INTERNAL_PROPERTY_CLASS internal property - * is "Object". - * - * See also: ecma_object_get_class_name - */ - return obj_p; } /* ecma_op_create_object_object_noarg */ @@ -109,6 +101,32 @@ ecma_op_create_object_object_arg (ecma_value_t value) /**< argument of construct } } /* ecma_op_create_object_object_arg */ +/** + * Object creation operation with no arguments. + * It sets the given prototype to the newly created object. + * + * See also: ECMA-262 v5, 15.2.2.1, 15.2.3.5 + * + * @return pointer to newly created object + */ +ecma_object_t* +ecma_op_create_object_object_noarg_and_set_prototype (ecma_object_t *object_prototype_p) /**< pointer to prototype of + the object + (can be NULL) */ +{ + ecma_object_t *obj_p = ecma_create_object (object_prototype_p, true, ECMA_OBJECT_TYPE_GENERAL); + + /* + * [[Class]] property of ECMA_OBJECT_TYPE_GENERAL type objects + * without ECMA_INTERNAL_PROPERTY_CLASS internal property + * is "Object". + * + * See also: ecma_object_get_class_name + */ + + return obj_p; +} /* ecma_op_create_object_object_noarg_and_set_prototype */ + /** * [[Get]] ecma general object's operation * diff --git a/jerry-core/ecma/operations/ecma-objects-general.h b/jerry-core/ecma/operations/ecma-objects-general.h index 3ed8e07396..4b5ad6faef 100644 --- a/jerry-core/ecma/operations/ecma-objects-general.h +++ b/jerry-core/ecma/operations/ecma-objects-general.h @@ -26,8 +26,9 @@ * @{ */ -extern ecma_object_t* ecma_op_create_object_object_noarg (void); +extern ecma_object_t *ecma_op_create_object_object_noarg (void); extern ecma_completion_value_t ecma_op_create_object_object_arg (ecma_value_t value); +extern ecma_object_t *ecma_op_create_object_object_noarg_and_set_prototype (ecma_object_t *object_prototype_p); extern ecma_completion_value_t ecma_op_general_object_get (ecma_object_t *obj_p, ecma_string_t *property_name_p); diff --git a/tests/jerry/object-create.js b/tests/jerry/object-create.js new file mode 100644 index 0000000000..fc7a496d4a --- /dev/null +++ b/tests/jerry/object-create.js @@ -0,0 +1,146 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Example where we create an object with a couple of sample properties. +// (Note that the second parameter maps keys to *property descriptors*.) +var o = Object.create(Object.prototype, { + // foo is a regular 'value property' + foo: { writable: true, configurable: true, value: 'hello' }, + // bar is a getter-and-setter (accessor) property + bar: { + configurable: false, + get: function() { return 10; }, + set: function(value) { console.log('Setting `o.bar` to', value); } + } +}); + +// create a new object whose prototype is a new, empty object +// and a adding single property 'p', with value 42 +var o = Object.create({}, { p: { value: 42 } }); +// by default properties ARE NOT writable, enumerable or configurable: +o.p = 24; +assert (o.p === 42); + +// to specify an ES3 property +var o2 = Object.create({}, { + p: { + value: 42, + writable: true, + enumerable: true, + configurable: true + } +}); + +assert (o2.p === 42); + +// Shape - superclass +function Shape() { + this.x = 0; + this.y = 0; +} + +// superclass method +Shape.prototype.move = function(x, y) { + this.x += x; + this.y += y; +}; + +// Rectangle - subclass +function Rectangle() { + Shape.call(this); // call super constructor. +} + +// subclass extends superclass +Rectangle.prototype = Object.create(Shape.prototype); +Rectangle.prototype.constructor = Rectangle; + +var rect = new Rectangle(); + +assert (rect instanceof Rectangle); +assert (rect instanceof Shape); +rect.move(1, 1); +assert (rect.x === 1) +assert (rect.y === 1); + +var obj = { + protoFunction: function() { + return 3; + } +}; + +Object.defineProperties(obj, { + "foo": { + value: 42, + writable: true, + }, + "a": { + value: "b", + configurable: true + }, + "bar": { + get: function() { + return this.foo; + }, + }, +}); + +var obj2 = Object.create(obj); + +assert (obj2.protoFunction() === 3); +assert (obj2.foo === 42); +assert (obj2.a === "b"); +assert (obj2.bar === 42); +assert (Object.getPrototypeOf (obj2) === obj); + + +var props = { + prop1: { + value: 1, + }, + hey: function () { + return "ho"; + } +}; + +var obj3 = Object.create(obj, props); +assert (obj3.prop1 === 1); +assert (obj3.protoFunction()); +try { + assert (obj3.hey === undefined); + obj3.hey(); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +// Create an object with null as prototype +var obj = Object.create(null) +assert (typeof (obj) === "object"); +// FIXME: enable this assertion after the #208 is fixed. +// assert (Object.getPrototypeOf (obj) === null); + +try { + Object.create() + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +try { + Object.create(undefined) + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} From 77b01a6473aeaf066d126e19206466b3c1576d73 Mon Sep 17 00:00:00 2001 From: Peter Gal Date: Wed, 17 Jun 2015 17:04:13 +0200 Subject: [PATCH 04/18] Provide assert as an external method. Removed the internal assert implementation from the engine and provide externally an assert function via api calls. JerryScript-DCO-1.0-Signed-off-by: Peter Gal pgal.u-szeged@partner.samsung.com --- jerry-core/parser/js/opcodes-dumper.cpp | 38 ++--------------------- main-linux.cpp | 41 +++++++++++++++++++++++++ tests/unit/test-api.cpp | 5 +++ 3 files changed, 49 insertions(+), 35 deletions(-) diff --git a/jerry-core/parser/js/opcodes-dumper.cpp b/jerry-core/parser/js/opcodes-dumper.cpp index f515cfe988..16ce7bfece 100644 --- a/jerry-core/parser/js/opcodes-dumper.cpp +++ b/jerry-core/parser/js/opcodes-dumper.cpp @@ -383,28 +383,6 @@ create_op_meta_for_vlt (varg_list_type vlt, operand *res, operand *obj) return ret; } -static void -dump_assert (operand op) -{ - switch (op.type) - { - case OPERAND_LITERAL: - { - const opcode_t opcode = getop_is_true_jmp_down (LITERAL_TO_REWRITE, 0, 2); - serializer_dump_op_meta (create_op_meta_100 (opcode, op.data.lit_id)); - break; - } - case OPERAND_TMP: - { - const opcode_t opcode = getop_is_true_jmp_down (op.data.uid, 0, 2); - serializer_dump_op_meta (create_op_meta_000 (opcode)); - break; - } - } - const opcode_t opcode = getop_exitval (1); - serializer_dump_op_meta (create_op_meta_000 (opcode)); -} - static void split_opcode_counter (opcode_counter_t oc, idx_t *id1, idx_t *id2) { @@ -742,25 +720,15 @@ dumper_finish_scope (void) } bool -dumper_is_intrinsic (operand obj) +dumper_is_intrinsic (operand /* obj */) { - if (obj.type == OPERAND_LITERAL) - { - if (lit_literal_equal_type_zt (lit_get_literal_by_cp (obj.data.lit_id), (const ecma_char_t *) "assert")) - { - return true; - } - } return false; } operand -dump_intrinsic (operand obj, operand arg) +dump_intrinsic (operand /* obj */, operand /* arg */) { - JERRY_ASSERT (obj.type == OPERAND_LITERAL); - TODO (/* Rewrite when there will be more intrinsics. */) - JERRY_ASSERT (lit_literal_equal_type_zt (lit_get_literal_by_cp (obj.data.lit_id), (const ecma_char_t *) "assert")); - dump_assert (arg); + JERRY_UNREACHABLE (); return dump_undefined_assignment_res (); } diff --git a/main-linux.cpp b/main-linux.cpp index 8ec2f55167..1001f4ab5b 100644 --- a/main-linux.cpp +++ b/main-linux.cpp @@ -14,6 +14,7 @@ */ #include +#include #include #include "jerry.h" @@ -106,6 +107,30 @@ read_sources (const char *script_file_names[], } } +/** + * Provide the 'assert' implementation for the engine. + * + * @return true - if the argument was not a boolean value or it was boolean true. + */ +static bool +assert_handler (const jerry_api_object_t *function_obj_p __attr_unused___, /** < function object */ + const jerry_api_value_t *this_p __attr_unused___, /** < this arg */ + jerry_api_value_t *ret_val_p __attr_unused___, /** < return argument */ + const jerry_api_value_t args_p[], /** < function arguments */ + const uint16_t args_cnt) /** < number of function arguments */ +{ + if (args_cnt > 0 + && args_p[0].type == JERRY_API_DATA_TYPE_BOOLEAN + && args_p[0].v_bool != true) + { + JERRY_ERROR_MSG ("Script assertion failed\n"); + exit (JERRY_STANDALONE_EXIT_CODE_FAIL); + } + + return true; +} /* assert_handler */ + + int main (int argc, char **argv) @@ -234,6 +259,22 @@ main (int argc, jerry_init (flags); + jerry_api_object_t *global_obj_p = jerry_api_get_global (); + jerry_api_object_t *assert_func_p = jerry_api_create_external_function (assert_handler); + jerry_api_value_t assert_value; + assert_value.type = JERRY_API_DATA_TYPE_OBJECT; + assert_value.v_object = assert_func_p; + + bool is_assert_added = jerry_api_set_object_field_value (global_obj_p, "assert", &assert_value); + + jerry_api_release_value (&assert_value); + jerry_api_release_object (global_obj_p); + + if (!is_assert_added) + { + JERRY_ERROR_MSG ("Failed to register 'assert' method."); + } + jerry_completion_code_t ret_code = JERRY_COMPLETION_CODE_OK; if (!jerry_parse (source_p, source_size)) diff --git a/tests/unit/test-api.cpp b/tests/unit/test-api.cpp index 888d040e64..5fb3ad7515 100644 --- a/tests/unit/test-api.cpp +++ b/tests/unit/test-api.cpp @@ -19,6 +19,11 @@ #include "test-common.h" const char *test_source = ( + "function assert (arg) { " + " if (!arg) { " + " throw Error('Assert failed');" + " } " + "} " "this.t = 1; " "function f () { " "return this.t; " From c603d10360c51952fa8b5b80924ea58bd249681a Mon Sep 17 00:00:00 2001 From: SaeHie Park Date: Wed, 24 Jun 2015 07:05:56 +0900 Subject: [PATCH 05/18] Adjust nuttx heap size to 80K JerryScript-DCO-1.0-Signed-off-by: SaeHie Park saehie.park@samsung.com --- jerry-core/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jerry-core/CMakeLists.txt b/jerry-core/CMakeLists.txt index d60195849c..b33115d3b2 100644 --- a/jerry-core/CMakeLists.txt +++ b/jerry-core/CMakeLists.txt @@ -80,8 +80,8 @@ project (JerryCore CXX C ASM) # Platform-specific # Linux # Nuttx - math(EXPR MEM_HEAP_AREA_SIZE_16K "16 * 1024") - set(DEFINES_JERRY_NUTTX CONFIG_MEM_HEAP_AREA_SIZE=${MEM_HEAP_AREA_SIZE_16K}) + math(EXPR MEM_HEAP_AREA_SIZE_80K "80 * 1024") + set(DEFINES_JERRY_NUTTX CONFIG_MEM_HEAP_AREA_SIZE=${MEM_HEAP_AREA_SIZE_80K}) # MCU # stm32f3 math(EXPR MEM_HEAP_AREA_SIZE_16K "16 * 1024") From 4ff9e79b02eb4e2e26187ee533ac087eba0bfac8 Mon Sep 17 00:00:00 2001 From: Ruben Ayrapetyan Date: Wed, 24 Jun 2015 15:29:35 +0300 Subject: [PATCH 06/18] Generate anonymous function expressions for getters / setters of an object literal. Related issue: #234 JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com --- jerry-core/parser/js/parser.cpp | 2 +- tests/jerry/object-literal.js | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/jerry-core/parser/js/parser.cpp b/jerry-core/parser/js/parser.cpp index fe056d59cc..455cf50aac 100644 --- a/jerry-core/parser/js/parser.cpp +++ b/jerry-core/parser/js/parser.cpp @@ -267,7 +267,7 @@ parse_property_assignment (void) syntax_add_prop_name (name, is_setter ? PROP_SET : PROP_GET); skip_newlines (); - const operand func = parse_argument_list (VARG_FUNC_EXPR, name, NULL, NULL); + const operand func = parse_argument_list (VARG_FUNC_EXPR, empty_operand (), NULL, NULL); dump_function_end_for_rewrite (); diff --git a/tests/jerry/object-literal.js b/tests/jerry/object-literal.js index 4fc06ccf78..696556399c 100644 --- a/tests/jerry/object-literal.js +++ b/tests/jerry/object-literal.js @@ -71,3 +71,22 @@ assert (a.property1 === 25); b = delete a[b]; assert (b === true); assert (a.property1 === undefined); + +flow = ''; +a = { + get q () + { + flow += 'get: ' + (typeof q); + + return 0; + }, + set q (v) + { + flow += ', set: ' + (typeof q); + } +}; + +a.q; +a.q = 1; + +assert (flow == 'get: undefined, set: undefined'); From d7ecd4a467dd6c6b1ad16bd19d093fe9da3c520e Mon Sep 17 00:00:00 2001 From: Ruben Ayrapetyan Date: Thu, 25 Jun 2015 00:09:58 +0300 Subject: [PATCH 07/18] Fix raise of syntax errors for unmatched braces. Related issue: #43, #183 JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com --- jerry-core/parser/js/lexer.cpp | 13 +++++ tests/jerry/regression-test-issues-43-183.js | 56 ++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 tests/jerry/regression-test-issues-43-183.js diff --git a/jerry-core/parser/js/lexer.cpp b/jerry-core/parser/js/lexer.cpp index 369cdb1c3b..a5d375bdd1 100644 --- a/jerry-core/parser/js/lexer.cpp +++ b/jerry-core/parser/js/lexer.cpp @@ -1218,6 +1218,19 @@ lexer_next_token (void) goto end; } + /** + * FIXME: + * The way to raise syntax errors for unexpected EOF + * should be reworked so that EOF would be checked by + * caller of the routine, and the following condition + * would be checked as assertion in the routine. + */ + if (prev_token.type == TOK_EOF + && sent_token.type == TOK_EOF) + { + PARSE_ERROR ("Unexpected EOF", buffer - buffer_start); + } + prev_token = sent_token; sent_token = lexer_next_token_private (); diff --git a/tests/jerry/regression-test-issues-43-183.js b/tests/jerry/regression-test-issues-43-183.js new file mode 100644 index 0000000000..f0e32ce7ef --- /dev/null +++ b/tests/jerry/regression-test-issues-43-183.js @@ -0,0 +1,56 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +function check_syntax_error (script) +{ + try + { + eval (script); + assert (false); + } + catch (e) + { + assert (e instanceof SyntaxError); + } +} + +check_syntax_error ('{'); +check_syntax_error ('}'); +check_syntax_error ('['); +check_syntax_error (']'); +check_syntax_error ('('); +check_syntax_error (')'); + +check_syntax_error ('function f ('); +check_syntax_error ('function f ()'); +check_syntax_error ('function f () {'); +check_syntax_error ('function f () }'); +check_syntax_error ('function f ({) }'); +check_syntax_error ('function f { }'); +check_syntax_error ('function f {'); +check_syntax_error ('function f }'); + +check_syntax_error ('a = [[];'); + +check_syntax_error ('a = {;'); +check_syntax_error ('a = };'); +check_syntax_error ('a = {{};'); + +check_syntax_error ('a = {get q {} };'); +check_syntax_error ('a = {get q ( {} };'); +check_syntax_error ('a = {get q ) {} };'); +check_syntax_error ('a = {get q () };'); +check_syntax_error ('a = {get q () { };'); +check_syntax_error ('a = {get q () };'); +check_syntax_error ('a = {get q () { };'); From 1c19e5c8ab1dad35ac0b5a64dc670f9c46fd680c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zsolt=20Borb=C3=A9ly?= Date: Wed, 24 Jun 2015 11:01:12 +0200 Subject: [PATCH 08/18] Fix the Object.getPrototypeOf function. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Related issue: #208 JerryScript-DCO-1.0-Signed-off-by: Zsolt Borbély zsborbely.u-szeged@partner.samsung.com --- .../ecma/builtin-objects/ecma-builtin-object.cpp | 12 +++++++++--- tests/jerry/object-create.js | 3 +-- tests/jerry/object-getprototypeof.js | 12 +++++++++++- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-object.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-object.cpp index 30c34a897b..f3829ac050 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-object.cpp +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-object.cpp @@ -128,11 +128,17 @@ ecma_builtin_object_object_get_prototype_of (ecma_value_t this_arg __attr_unused { /* 2. */ ecma_object_t *obj_p = ecma_get_object_from_value (arg); - ecma_object_t *prototype_p = ecma_get_object_prototype (obj_p); - ecma_ref_object (prototype_p); - ret_value = ecma_make_normal_completion_value (ecma_make_object_value (prototype_p)); + if (prototype_p) + { + ret_value = ecma_make_normal_completion_value (ecma_make_object_value (prototype_p)); + ecma_ref_object (prototype_p); + } + else + { + ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_NULL); + } } return ret_value; diff --git a/tests/jerry/object-create.js b/tests/jerry/object-create.js index fc7a496d4a..a98fc6a08f 100644 --- a/tests/jerry/object-create.js +++ b/tests/jerry/object-create.js @@ -128,8 +128,7 @@ try { // Create an object with null as prototype var obj = Object.create(null) assert (typeof (obj) === "object"); -// FIXME: enable this assertion after the #208 is fixed. -// assert (Object.getPrototypeOf (obj) === null); +assert (Object.getPrototypeOf (obj) === null); try { Object.create() diff --git a/tests/jerry/object-getprototypeof.js b/tests/jerry/object-getprototypeof.js index c36e0822c5..50d03b85c0 100644 --- a/tests/jerry/object-getprototypeof.js +++ b/tests/jerry/object-getprototypeof.js @@ -35,11 +35,21 @@ try { assert (e instanceof TypeError); } +try { + var y = Object.getPrototypeOf(null); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + var obj = { x : "foo" }; -assert (Object.getPrototypeOf(obj) === Object.prototype) +assert (Object.getPrototypeOf(obj) === Object.prototype); var constructor = function () {}; constructor.prototype = obj; var d_obj = new constructor(); assert (Object.getPrototypeOf(d_obj) === obj); + +obj = Object.create(null); +assert (Object.getPrototypeOf(obj) === null); From 5f174cf8bfe1a62a619fef803ac0ee2a09dc228e Mon Sep 17 00:00:00 2001 From: Ruben Ayrapetyan Date: Wed, 24 Jun 2015 21:59:38 +0300 Subject: [PATCH 09/18] Remove 'process_keyword_names' stage, as keyword literals now can be registered during main parse stage. JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com --- jerry-core/parser/js/parser.cpp | 51 --------------------------------- 1 file changed, 51 deletions(-) diff --git a/jerry-core/parser/js/parser.cpp b/jerry-core/parser/js/parser.cpp index 455cf50aac..8feed2bfba 100644 --- a/jerry-core/parser/js/parser.cpp +++ b/jerry-core/parser/js/parser.cpp @@ -67,7 +67,6 @@ static void parse_statement (jsp_label_t *outermost_stmt_label_p); static operand parse_assignment_expression (bool); static void parse_source_element_list (bool); static operand parse_argument_list (varg_list_type, operand, uint8_t *, operand *); -static void process_keyword_names (void); static void skip_braces (void); static void skip_parens (void); @@ -2566,50 +2565,6 @@ skip_optional_name_and_parens (void) } } -static void process_keyword_names () -{ - if (token_is (TOK_KEYWORD)) - { - keyword kw = (keyword) token_data (); - skip_newlines (); - if (token_is (TOK_COLON)) - { - const char *s = lexer_keyword_to_string (kw); - lit_find_or_create_literal_from_charset ((const ecma_char_t *) s, (ecma_length_t) strlen (s)); - } - else - { - lexer_save_token (tok); - } - } - else if (token_is (TOK_NAME)) - { - if (lit_literal_equal_type_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()), (const ecma_char_t *) "get") - || lit_literal_equal_type_zt (lit_get_literal_by_cp (token_data_as_lit_cp ()), (const ecma_char_t *) "set")) - { - skip_newlines (); - if (token_is (TOK_KEYWORD)) - { - keyword kw = (keyword) token_data (); - skip_newlines (); - if (token_is (TOK_OPEN_PAREN)) - { - const char *s = lexer_keyword_to_string (kw); - lit_find_or_create_literal_from_charset ((const ecma_char_t *) s, (ecma_length_t) strlen (s)); - } - else - { - lexer_save_token (tok); - } - } - else - { - lexer_save_token (tok); - } - } - } -} - static void skip_braces (void) { @@ -2627,10 +2582,6 @@ skip_braces (void) { nesting_level--; } - else - { - process_keyword_names (); - } } } @@ -2805,8 +2756,6 @@ preparse_scope (bool is_global) is_ref_eval_identifier = true; } } - - process_keyword_names (); } skip_newlines (); } From 601f1eea58ea281578d8b71fef25da2ad60f013b Mon Sep 17 00:00:00 2001 From: Ruben Ayrapetyan Date: Wed, 24 Jun 2015 18:41:01 +0300 Subject: [PATCH 10/18] Introduce jsp_skip_braces function for skiping blocks, surrounded with braces. JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com --- jerry-core/parser/js/parser.cpp | 125 ++++++++++++++------------------ 1 file changed, 56 insertions(+), 69 deletions(-) diff --git a/jerry-core/parser/js/parser.cpp b/jerry-core/parser/js/parser.cpp index 8feed2bfba..c1e2c508cf 100644 --- a/jerry-core/parser/js/parser.cpp +++ b/jerry-core/parser/js/parser.cpp @@ -67,8 +67,6 @@ static void parse_statement (jsp_label_t *outermost_stmt_label_p); static operand parse_assignment_expression (bool); static void parse_source_element_list (bool); static operand parse_argument_list (varg_list_type, operand, uint8_t *, operand *); -static void skip_braces (void); -static void skip_parens (void); static bool token_is (token_type tt) @@ -173,6 +171,55 @@ is_strict_mode (void) return scopes_tree_strict_mode (STACK_TOP (scopes)); } +/** + * Skip block, defined with braces of specified type + * + * Note: + * Missing corresponding brace is considered a syntax error + * + * Note: + * Opening brace of the block to skip should be set as current + * token when the routine is called + */ +static void +jsp_skip_braces (token_type brace_type) /**< type of the opening brace */ +{ + current_token_must_be (brace_type); + + token_type closing_bracket_type; + + if (brace_type == TOK_OPEN_PAREN) + { + closing_bracket_type = TOK_CLOSE_PAREN; + } + else if (brace_type == TOK_OPEN_BRACE) + { + closing_bracket_type = TOK_CLOSE_BRACE; + } + else + { + JERRY_ASSERT (brace_type == TOK_OPEN_SQUARE); + closing_bracket_type = TOK_CLOSE_SQUARE; + } + + skip_newlines (); + + while (!token_is (closing_bracket_type) + && !token_is (TOK_EOF)) + { + if (token_is (TOK_OPEN_PAREN) + || token_is (TOK_OPEN_BRACE) + || token_is (TOK_OPEN_SQUARE)) + { + jsp_skip_braces (tok.type); + } + + skip_newlines (); + } + + current_token_must_be (closing_bracket_type); +} /* jsp_skip_braces */ + /* property_name : Identifier | Keyword @@ -1964,7 +2011,7 @@ parse_while_statement (jsp_label_t *outermost_stmt_label_p) /**< outermost (firs token_after_newlines_must_be (TOK_OPEN_PAREN); const locus cond_loc = tok.loc; - skip_parens (); + jsp_skip_braces (TOK_OPEN_PAREN); dump_jump_to_end_for_rewrite (); @@ -2020,7 +2067,7 @@ skip_case_clause_body (void) { if (token_is (TOK_OPEN_BRACE)) { - skip_braces (); + jsp_skip_braces (TOK_OPEN_BRACE); } skip_newlines (); } @@ -2565,73 +2612,13 @@ skip_optional_name_and_parens (void) } } -static void -skip_braces (void) -{ - current_token_must_be (TOK_OPEN_BRACE); - - uint8_t nesting_level = 1; - while (nesting_level > 0) - { - skip_newlines (); - if (token_is (TOK_OPEN_BRACE)) - { - nesting_level++; - } - else if (token_is (TOK_CLOSE_BRACE)) - { - nesting_level--; - } - } -} - static void skip_function (void) { skip_newlines (); skip_optional_name_and_parens (); skip_newlines (); - skip_braces (); -} - -static void -skip_squares (void) -{ - current_token_must_be (TOK_OPEN_SQUARE); - - uint8_t nesting_level = 1; - while (nesting_level > 0) - { - skip_newlines (); - if (token_is (TOK_OPEN_SQUARE)) - { - nesting_level++; - } - else if (token_is (TOK_CLOSE_SQUARE)) - { - nesting_level--; - } - } -} - -static void -skip_parens (void) -{ - current_token_must_be (TOK_OPEN_PAREN); - - uint8_t nesting_level = 1; - while (nesting_level > 0) - { - skip_newlines (); - if (token_is (TOK_OPEN_PAREN)) - { - nesting_level++; - } - else if (token_is (TOK_CLOSE_PAREN)) - { - nesting_level--; - } - } + jsp_skip_braces (TOK_OPEN_BRACE); } static bool @@ -2670,15 +2657,15 @@ preparse_var_decls (void) } else if (token_is (TOK_OPEN_BRACE)) { - skip_braces (); + jsp_skip_braces (TOK_OPEN_BRACE); } else if (token_is (TOK_OPEN_SQUARE)) { - skip_squares (); + jsp_skip_braces (TOK_OPEN_SQUARE); } else if (token_is (TOK_OPEN_PAREN)) { - skip_parens (); + jsp_skip_braces (TOK_OPEN_PAREN); } skip_token (); } @@ -2739,7 +2726,7 @@ preparse_scope (bool is_global) } else if (token_is (TOK_OPEN_BRACE)) { - skip_braces (); + jsp_skip_braces (TOK_OPEN_BRACE); } else { From f849cc6283698b2118e5a12b2bdb4d6fc02c962b Mon Sep 17 00:00:00 2001 From: Ruben Ayrapetyan Date: Wed, 24 Jun 2015 21:39:14 +0300 Subject: [PATCH 11/18] Fix parse of simple for statement. Related issue: #156 JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com --- jerry-core/parser/js/parser.cpp | 290 ++++++++++++++--------- tests/jerry/for.js | 40 +++- tests/jerry/regression-test-issue-156.js | 19 ++ 3 files changed, 240 insertions(+), 109 deletions(-) create mode 100644 tests/jerry/regression-test-issue-156.js diff --git a/jerry-core/parser/js/parser.cpp b/jerry-core/parser/js/parser.cpp index c1e2c508cf..e6014f0c1d 100644 --- a/jerry-core/parser/js/parser.cpp +++ b/jerry-core/parser/js/parser.cpp @@ -220,6 +220,67 @@ jsp_skip_braces (token_type brace_type) /**< type of the opening brace */ current_token_must_be (closing_bracket_type); } /* jsp_skip_braces */ +/** + * Find next token of specified type before the specified location + * + * Note: + * If skip_brace_blocks is true, every { should correspond to } brace before search end location, + * otherwise a syntax error is raised. + * + * @return true - if token was found (in the case, it is the current token, + * and lexer locus points to it), + * false - otherwise (in the case, lexer locus points to end_loc). + */ +static bool +jsp_find_next_token_before_the_locus (token_type token_to_find, /**< token to search for + * (except TOK_NEWLINE and TOK_EOF) */ + locus end_loc, /**< location to search before */ + bool skip_brace_blocks) /**< skip blocks, surrounded with { and } braces */ +{ + JERRY_ASSERT (token_to_find != TOK_NEWLINE + && token_to_find != TOK_EOF); + + while (tok.loc < end_loc) + { + if (skip_brace_blocks) + { + if (token_is (TOK_OPEN_BRACE)) + { + jsp_skip_braces (TOK_OPEN_BRACE); + + JERRY_ASSERT (token_is (TOK_CLOSE_BRACE)); + skip_newlines (); + + if (tok.loc >= end_loc) + { + lexer_seek (end_loc); + tok = lexer_next_token (); + + return false; + } + } + else if (token_is (TOK_CLOSE_BRACE)) + { + EMIT_ERROR ("Unmatched } brace"); + } + } + + if (token_is (token_to_find)) + { + return true; + } + else + { + JERRY_ASSERT (!token_is (TOK_EOF)); + } + + skip_newlines (); + } + + JERRY_ASSERT (tok.loc == end_loc); + return false; +} /* jsp_find_next_token_before_the_locus */ + /* property_name : Identifier | Keyword @@ -1726,88 +1787,146 @@ parse_variable_declaration (void) (LT!* ',' LT!* variable_declaration)* ; */ static void -parse_variable_declaration_list (bool *several_decls) +parse_variable_declaration_list (void) { + JERRY_ASSERT (is_keyword (KW_VAR)); + while (true) { + skip_newlines (); + parse_variable_declaration (); skip_newlines (); if (!token_is (TOK_COMMA)) { lexer_save_token (tok); - return; - } - - skip_newlines (); - if (several_decls) - { - *several_decls = true; + break; } } } +/** + * Parse for statement + * + * See also: + * ECMA-262 v5, 12.6.3 + * + * Note: + * Syntax: + * Initializer Condition Increment Body LoopEnd + * - for ([ExpressionNoIn]; [Expression]; [Expression]) Statement + * - for (var VariableDeclarationListNoIn; [Expression]; [Expression]) Statement + * + * Note: + * Layout of generated byte-code is the following: + * Initializer ([ExpressionNoIn] / VariableDeclarationListNoIn) + * Jump -> ConditionCheck + * NextIteration: + * Body (Statement) + * ContinueTarget: + * Increment ([Expression]) + * ConditionCheck: + * Condition ([Expression]) + * If Condition is evaluted to true, jump -> NextIteration + */ static void -parse_plain_for (jsp_label_t *outermost_stmt_label_p) /**< outermost (first) label, corresponding to - * the statement (or NULL, if there are no named - * labels associated with the statement) */ +jsp_parse_for_statement (jsp_label_t *outermost_stmt_label_p, /**< outermost (first) label, corresponding to + * the statement (or NULL, if there are no named + * labels associated with the statement) */ + locus for_body_statement_loc) /**< locus of loop body statement */ { - dump_jump_to_end_for_rewrite (); - - // Skip till body - JERRY_ASSERT (token_is (TOK_SEMICOLON)); + current_token_must_be (TOK_OPEN_PAREN); skip_newlines (); - const locus cond_loc = tok.loc; - while (!token_is (TOK_SEMICOLON)) + + // Initializer + if (is_keyword (KW_VAR)) { - skip_newlines (); + parse_variable_declaration_list (); + skip_token (); } - skip_newlines (); - const locus incr_loc = tok.loc; - while (!token_is (TOK_CLOSE_PAREN)) + else if (!token_is (TOK_SEMICOLON)) { - skip_newlines (); + parse_expression (false, JSP_EVAL_RET_STORE_NOT_DUMP); + skip_token (); } + else + { + // Initializer is empty + } + + // Jump -> ConditionCheck + dump_jump_to_end_for_rewrite (); dumper_set_next_interation_target (); - // Parse body + current_token_must_be (TOK_SEMICOLON); + skip_token (); + + // Save Condition locus + const locus condition_loc = tok.loc; + + if (!jsp_find_next_token_before_the_locus (TOK_SEMICOLON, + for_body_statement_loc, + true)) + { + EMIT_ERROR ("Invalid for statement"); + } + + current_token_must_be (TOK_SEMICOLON); + skip_token (); + + // Save Increment locus + const locus increment_loc = tok.loc; + + // Body + lexer_seek (for_body_statement_loc); skip_newlines (); + parse_statement (NULL); - const locus end_loc = tok.loc; + // Save LoopEnd locus + const locus loop_end_loc = tok.loc; + // Setup ContinueTarget jsp_label_setup_continue_target (outermost_stmt_label_p, serializer_get_current_opcode_counter ()); - lexer_seek (incr_loc); - skip_token (); + // Increment + lexer_seek (increment_loc); + skip_newlines (); + if (!token_is (TOK_CLOSE_PAREN)) { parse_expression (true, JSP_EVAL_RET_STORE_NOT_DUMP); } + current_token_must_be (TOK_CLOSE_PAREN); + + // Setup ConditionCheck rewrite_jump_to_end (); - lexer_seek (cond_loc); - skip_token (); + // Condition + lexer_seek (condition_loc); + skip_newlines (); + if (token_is (TOK_SEMICOLON)) { dump_continue_iterations_check (empty_operand ()); } else { - const operand cond = parse_expression (true, JSP_EVAL_RET_STORE_NOT_DUMP); + operand cond = parse_expression (true, JSP_EVAL_RET_STORE_NOT_DUMP); dump_continue_iterations_check (cond); } - lexer_seek (end_loc); - skip_token (); + lexer_seek (loop_end_loc); + skip_newlines (); if (tok.type != TOK_CLOSE_BRACE) { lexer_save_token (tok); } -} +} /* jsp_parse_for_statement */ static void parse_for_in (jsp_label_t *outermost_stmt_label_p) /**< outermost (first) label, corresponding to @@ -1819,92 +1938,48 @@ parse_for_in (jsp_label_t *outermost_stmt_label_p) /**< outermost (first) label, EMIT_SORRY ("'for in' loops are not supported yet"); } -/* for_statement - : 'for' LT!* '(' (LT!* for_statement_initialiser_part)? LT!* ';' - (LT!* expression)? LT!* ';' (LT!* expression)? LT!* ')' LT!* statement - ; - - for_statement_initialiser_part - : expression - | 'var' LT!* variable_declaration_list - ; - - for_in_statement - : 'for' LT!* '(' LT!* for_in_statement_initialiser_part LT!* 'in' - LT!* expression LT!* ')' LT!* statement - ; - - for_in_statement_initialiser_part - : left_hand_side_expression - | 'var' LT!* variable_declaration - ;*/ - +/** + * Parse for/for-in statements + * + * See also: + * ECMA-262 v5, 12.6.3 and 12.6.4 + */ static void -parse_for_or_for_in_statement (jsp_label_t *outermost_stmt_label_p) /**< outermost (first) label, corresponding to - * the statement (or NULL, if there are no named - * labels associated with the statement) */ +jsp_parse_for_or_for_in_statement (jsp_label_t *outermost_stmt_label_p) /**< outermost (first) label, + * corresponding to the statement + * (or NULL, if there are no name + * labels associated with the statement) */ { assert_keyword (KW_FOR); token_after_newlines_must_be (TOK_OPEN_PAREN); - skip_newlines (); - if (token_is (TOK_SEMICOLON)) - { - parse_plain_for (outermost_stmt_label_p); - return; - } - /* Both for_statement_initialiser_part and for_in_statement_initialiser_part - contains 'var'. Check it first. */ - if (is_keyword (KW_VAR)) - { - bool several_decls = false; - skip_newlines (); - parse_variable_declaration_list (&several_decls); - if (several_decls) - { - token_after_newlines_must_be (TOK_SEMICOLON); - parse_plain_for (outermost_stmt_label_p); - return; - } - else - { - skip_newlines (); - if (token_is (TOK_SEMICOLON)) - { - parse_plain_for (outermost_stmt_label_p); - return; - } - else if (is_keyword (KW_IN)) - { - parse_for_in (outermost_stmt_label_p); - return; - } - else - { - EMIT_ERROR ("Expected either ';' or 'in' token"); - } - } - } + locus for_open_paren_loc, for_body_statement_loc; - /* expression contains left_hand_side_expression. */ - parse_expression (false, JSP_EVAL_RET_STORE_NOT_DUMP); + for_open_paren_loc = tok.loc; + jsp_skip_braces (TOK_OPEN_PAREN); skip_newlines (); - if (token_is (TOK_SEMICOLON)) - { - parse_plain_for (outermost_stmt_label_p); - return; - } - else if (is_keyword (KW_IN)) + + for_body_statement_loc = tok.loc; + + lexer_seek (for_open_paren_loc); + tok = lexer_next_token (); + + bool is_plain_for = jsp_find_next_token_before_the_locus (TOK_SEMICOLON, + for_body_statement_loc, + true); + lexer_seek (for_open_paren_loc); + tok = lexer_next_token (); + + if (is_plain_for) { - parse_for_in (outermost_stmt_label_p); - return; + jsp_parse_for_statement (outermost_stmt_label_p, for_body_statement_loc); } else { - EMIT_ERROR ("Expected either ';' or 'in' token"); + parse_for_in (outermost_stmt_label_p); } -} +} /* jsp_parse_for_or_for_in_statement */ static operand parse_expression_inside_parens (void) @@ -2318,7 +2393,7 @@ parse_iterational_statement (jsp_label_t *outermost_named_stmt_label_p) /**< out else { JERRY_ASSERT (is_keyword (KW_FOR)); - parse_for_or_for_in_statement (outermost_stmt_label_p); + jsp_parse_for_or_for_in_statement (outermost_stmt_label_p); } jsp_label_rewrite_jumps_and_pop (&label, @@ -2412,8 +2487,7 @@ parse_statement (jsp_label_t *outermost_stmt_label_p) /**< outermost (first) lab } if (is_keyword (KW_VAR)) { - skip_newlines (); - parse_variable_declaration_list (NULL); + parse_variable_declaration_list (); return; } if (is_keyword (KW_FUNCTION)) diff --git a/tests/jerry/for.js b/tests/jerry/for.js index e1d7cc5d7a..ce33b98dc6 100644 --- a/tests/jerry/for.js +++ b/tests/jerry/for.js @@ -1,4 +1,5 @@ -// Copyright 2014 Samsung Electronics Co., Ltd. +// Copyright 2014-2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,15 +13,18 @@ // See the License for the specific language governing permissions and // limitations under the License. +// 1. var i = 0; for (; i < 100; i++) { } assert(i == 100); +// 2. for (var j = 0; j < 100; j++) { } assert(j == 100); +// 3. for (i = 0; ; ) { if (i == 100) { break; @@ -30,6 +34,7 @@ for (i = 0; ; ) { } assert(i == 100); +// 4. for (i = 0; i < 10; i++) { for (j = 0; j < 10; j++) { } @@ -38,3 +43,36 @@ assert(i != 100); assert(j != 100); assert(i == 10); assert(j == 10); + +// 5. +s = ''; +for ( +var i = {x: 0}; + + i.x < 2 +; + i.x++ + +) + { + s += i.x; +} + +assert (s === '01'); + +// 6. +s = ''; +for ( +var i = {x: 0}; + + i.x < 2 +; + + i.x++ + +) + { + s += i.x; +} + +assert (s === '01'); diff --git a/tests/jerry/regression-test-issue-156.js b/tests/jerry/regression-test-issue-156.js new file mode 100644 index 0000000000..1dfbd36082 --- /dev/null +++ b/tests/jerry/regression-test-issue-156.js @@ -0,0 +1,19 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +function dec(x) { return x - 1 }; +for (var i = 5; i > 0; i = dec(i)) {} +for (var i = 11; i = dec (i); i--) {} +for (var i = dec (12); i > 0; i--) {} From 6511f0e1ba9b674fea4229353b1ea15541fa620b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Lang=C3=B3?= Date: Thu, 25 Jun 2015 23:26:16 +0300 Subject: [PATCH 12/18] Add ecma_raise_* helpers for raising ECMA exceptions. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com --- .../ecma/operations/ecma-exceptions.cpp | 112 ++++++++++++++++++ jerry-core/ecma/operations/ecma-exceptions.h | 11 +- 2 files changed, 122 insertions(+), 1 deletion(-) diff --git a/jerry-core/ecma/operations/ecma-exceptions.cpp b/jerry-core/ecma/operations/ecma-exceptions.cpp index 4fbe2ab6e0..5658037049 100644 --- a/jerry-core/ecma/operations/ecma-exceptions.cpp +++ b/jerry-core/ecma/operations/ecma-exceptions.cpp @@ -130,6 +130,118 @@ ecma_new_standard_error_with_message (ecma_standard_error_t error_type, /**< nat return new_error_obj_p; } /* ecma_new_standard_error_with_message */ +/** + * Raise a standard ecma-error with the given type and message. + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_standard_error (ecma_standard_error_t error_type, /**< error type */ + const ecma_char_t *msg_p) /**< error message */ +{ + ecma_string_t *error_msg_p = ecma_new_ecma_string (msg_p); + ecma_object_t *error_obj_p = ecma_new_standard_error_with_message (error_type, error_msg_p); + ecma_deref_ecma_string (error_msg_p); + return ecma_make_throw_obj_completion_value (error_obj_p); +} /* ecma_raise_standard_error */ + +/** + * Raise a common error with the given message. + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_common_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_COMMON, msg_p); +} /* ecma_raise_common_error */ + +/** + * Raise an EvalError with the given message. + * + * See also: ECMA-262 v5, 15.11.6.1 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_eval_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_EVAL, msg_p); +} /* ecma_raise_eval_error */ + +/** + * Raise a RangeError with the given message. + * + * See also: ECMA-262 v5, 15.11.6.2 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_range_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_RANGE, msg_p); +} /* ecma_raise_range_error */ + +/** + * Raise a ReferenceError with the given message. + * + * See also: ECMA-262 v5, 15.11.6.3 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_reference_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_REFERENCE, msg_p); +} /* ecma_raise_reference_error */ + +/** + * Raise a SyntaxError with the given message. + * + * See also: ECMA-262 v5, 15.11.6.4 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_syntax_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_SYNTAX, msg_p); +} /* ecma_raise_syntax_error */ + +/** + * Raise a TypeError with the given message. + * +* See also: ECMA-262 v5, 15.11.6.5 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_type_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_TYPE, msg_p); +} /* ecma_raise_type_error */ + +/** + * Raise a URIError with the given message. + * +* See also: ECMA-262 v5, 15.11.6.6 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_raise_uri_error (const ecma_char_t *msg_p) /**< error message */ +{ + return ecma_raise_standard_error (ECMA_ERROR_URI, msg_p); +} /* ecma_raise_uri_error */ + /** * @} * @} diff --git a/jerry-core/ecma/operations/ecma-exceptions.h b/jerry-core/ecma/operations/ecma-exceptions.h index b29ed14c73..e93eaed838 100644 --- a/jerry-core/ecma/operations/ecma-exceptions.h +++ b/jerry-core/ecma/operations/ecma-exceptions.h @@ -45,8 +45,17 @@ typedef enum } ecma_standard_error_t; extern ecma_object_t *ecma_new_standard_error (ecma_standard_error_t error_type); -extern ecma_object_t* ecma_new_standard_error_with_message (ecma_standard_error_t error_type, +extern ecma_object_t *ecma_new_standard_error_with_message (ecma_standard_error_t error_type, ecma_string_t *message_string_p); +extern ecma_completion_value_t ecma_raise_standard_error (ecma_standard_error_t error_type, + const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_common_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_eval_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_range_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_reference_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_syntax_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_type_error (const ecma_char_t *msg_p); +extern ecma_completion_value_t ecma_raise_uri_error (const ecma_char_t *msg_p); /** * @} From d0e9edc16d710e517f6a1c798faa98bf4eaf9202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Lang=C3=B3?= Date: Thu, 25 Jun 2015 23:30:53 +0300 Subject: [PATCH 13/18] Fix comments in jerry-core/ecma/base/ecma-helpers.h. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com --- jerry-core/ecma/base/ecma-helpers.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/jerry-core/ecma/base/ecma-helpers.h b/jerry-core/ecma/base/ecma-helpers.h index 987e6345f6..050435c7a4 100644 --- a/jerry-core/ecma/base/ecma-helpers.h +++ b/jerry-core/ecma/base/ecma-helpers.h @@ -51,7 +51,7 @@ */ #define ECMA_SET_POINTER(field, non_compressed_pointer) MEM_CP_SET_POINTER (field, non_compressed_pointer) -/* ecma-helpers-value.c */ +/* ecma-helpers-value.cpp */ extern bool ecma_is_value_empty (ecma_value_t value); extern bool ecma_is_value_undefined (ecma_value_t value); extern bool ecma_is_value_null (ecma_value_t value); @@ -109,7 +109,7 @@ extern bool ecma_is_completion_value_normal_true (ecma_completion_value_t value) extern bool ecma_is_completion_value_normal_false (ecma_completion_value_t value); extern bool ecma_is_completion_value_empty (ecma_completion_value_t value); -/* ecma-helpers-string.c */ +/* ecma-helpers-string.cpp */ extern ecma_string_t* ecma_new_ecma_string (const ecma_char_t *string_p); extern ecma_string_t* ecma_new_ecma_string_from_uint32 (uint32_t uint_number); extern ecma_string_t* ecma_new_ecma_string_from_number (ecma_number_t number); @@ -161,7 +161,7 @@ extern bool ecma_is_zt_ex_string_magic (const ecma_char_t *zt_string_p, ecma_mag extern ecma_string_hash_t ecma_string_hash (const ecma_string_t *string_p); extern ecma_string_hash_t ecma_chars_buffer_calc_hash_last_chars (const ecma_char_t *chars, ecma_length_t length); -/* ecma-helpers-number.c */ +/* ecma-helpers-number.cpp */ extern const ecma_number_t ecma_number_relative_eps; extern ecma_number_t ecma_number_make_nan (void); @@ -199,7 +199,7 @@ extern void ecma_number_to_decimal (ecma_number_t num, int32_t *out_digits_num_p, int32_t *out_decimal_exp_p); -/* ecma-helpers-values-collection.c */ +/* ecma-helpers-values-collection.cpp */ extern ecma_collection_header_t *ecma_new_values_collection (const ecma_value_t values_buffer[], ecma_length_t values_number, @@ -227,7 +227,7 @@ ecma_collection_iterator_init (ecma_collection_iterator_t *iterator_p, extern bool ecma_collection_iterator_next (ecma_collection_iterator_t *iterator_p); -/* ecma-helpers.c */ +/* ecma-helpers.cpp */ extern ecma_object_t* ecma_create_object (ecma_object_t *prototype_object_p, bool is_extensible, ecma_object_type_t type); @@ -308,7 +308,7 @@ extern ecma_property_descriptor_t ecma_make_empty_property_descriptor (void); extern void ecma_free_property_descriptor (ecma_property_descriptor_t *prop_desc_p); extern ecma_property_descriptor_t ecma_get_property_descriptor_from_property (ecma_property_t *prop_p); -/* ecma-helpers-external-pointers.c */ +/* ecma-helpers-external-pointers.cpp */ extern bool ecma_create_external_pointer_property (ecma_object_t *obj_p, ecma_internal_property_id_t id, @@ -320,7 +320,7 @@ ecma_get_external_pointer_value (ecma_object_t *obj_p, extern void ecma_free_external_pointer_in_property (ecma_property_t *prop_p); -/* ecma-helpers-conversion.c */ +/* ecma-helpers-conversion.cpp */ extern ecma_number_t ecma_zt_string_to_number (const ecma_char_t *str_p); extern ssize_t ecma_uint32_to_string (uint32_t value, ecma_char_t *out_buffer_p, ssize_t buffer_size); extern uint32_t ecma_number_to_uint32 (ecma_number_t value); From 50b64bfad2fd50ec39b9797596e73b550f0f0e46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Lang=C3=B3?= Date: Thu, 25 Jun 2015 23:33:17 +0300 Subject: [PATCH 14/18] Add ecma_new_ecma_string version that takes buffer with specified number of characters, instead of zero-terminated string. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com --- jerry-core/ecma/base/ecma-helpers-string.cpp | 62 ++++++++++++++++---- jerry-core/ecma/base/ecma-helpers.h | 1 + 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/jerry-core/ecma/base/ecma-helpers-string.cpp b/jerry-core/ecma/base/ecma-helpers-string.cpp index c3590f716e..da00f74f48 100644 --- a/jerry-core/ecma/base/ecma-helpers-string.cpp +++ b/jerry-core/ecma/base/ecma-helpers-string.cpp @@ -455,6 +455,54 @@ ecma_init_ecma_string_from_magic_string_ex_id (ecma_string_t *string_p, /**< des string_p->u.magic_string_ex_id = magic_string_ex_id; } /* ecma_init_ecma_string_from_magic_string_ex_id */ +/** + * Allocate new ecma-string and fill it with specified number of characters from specified buffer + * + * @return pointer to ecma-string descriptor + */ +ecma_string_t* +ecma_new_ecma_string (const ecma_char_t *string_p, /**< input string */ + const ecma_length_t length) /**< number of characters */ +{ + JERRY_ASSERT (string_p != NULL); + JERRY_ASSERT (length > 0 && length <= ecma_zt_string_length (string_p)); + + if (length != ecma_zt_string_length (string_p)) + { + /* FIXME: update this when 'ecma_is_charset_magic' interface is added */ + ecma_char_t *zt_str_p = (ecma_char_t *) mem_heap_alloc_block ((size_t) (length + 1), MEM_HEAP_ALLOC_SHORT_TERM); + memcpy (zt_str_p, string_p, length * sizeof (ecma_char_t)); + zt_str_p[length] = 0; + + ecma_magic_string_id_t magic_string_id; + if (ecma_is_zt_string_magic (zt_str_p, &magic_string_id)) + { + mem_heap_free_block (zt_str_p); + return ecma_get_magic_string (magic_string_id); + } + + ecma_magic_string_ex_id_t magic_string_ex_id; + if (ecma_is_zt_ex_string_magic (zt_str_p, &magic_string_ex_id)) + { + mem_heap_free_block (zt_str_p); + return ecma_get_magic_string_ex (magic_string_ex_id); + } + mem_heap_free_block (zt_str_p); + } + + ecma_string_t *string_desc_p = ecma_alloc_string (); + string_desc_p->refs = 1; + string_desc_p->is_stack_var = false; + string_desc_p->container = ECMA_STRING_CONTAINER_HEAP_CHUNKS; + string_desc_p->hash = ecma_chars_buffer_calc_hash_last_chars (string_p, length); + string_desc_p->u.common_field = 0; + + ecma_collection_header_t *collection_p = ecma_new_chars_collection (string_p, length); + ECMA_SET_NON_NULL_POINTER (string_desc_p->u.collection_cp, collection_p); + + return string_desc_p; +} /* ecma_new_ecma_string */ + /** * Allocate new ecma-string and fill it with characters from specified buffer * @@ -485,19 +533,7 @@ ecma_new_ecma_string (const ecma_char_t *string_p) /**< zero-terminated string * length++; } - JERRY_ASSERT (length > 0); - - ecma_string_t* string_desc_p = ecma_alloc_string (); - string_desc_p->refs = 1; - string_desc_p->is_stack_var = false; - string_desc_p->container = ECMA_STRING_CONTAINER_HEAP_CHUNKS; - string_desc_p->hash = ecma_chars_buffer_calc_hash_last_chars (string_p, length); - - string_desc_p->u.common_field = 0; - ecma_collection_header_t *collection_p = ecma_new_chars_collection (string_p, length); - ECMA_SET_NON_NULL_POINTER (string_desc_p->u.collection_cp, collection_p); - - return string_desc_p; + return ecma_new_ecma_string (string_p, length); } /* ecma_new_ecma_string */ /** diff --git a/jerry-core/ecma/base/ecma-helpers.h b/jerry-core/ecma/base/ecma-helpers.h index 050435c7a4..06c08f1d36 100644 --- a/jerry-core/ecma/base/ecma-helpers.h +++ b/jerry-core/ecma/base/ecma-helpers.h @@ -110,6 +110,7 @@ extern bool ecma_is_completion_value_normal_false (ecma_completion_value_t value extern bool ecma_is_completion_value_empty (ecma_completion_value_t value); /* ecma-helpers-string.cpp */ +extern ecma_string_t* ecma_new_ecma_string (const ecma_char_t *string_p, const ecma_length_t length); extern ecma_string_t* ecma_new_ecma_string (const ecma_char_t *string_p); extern ecma_string_t* ecma_new_ecma_string_from_uint32 (uint32_t uint_number); extern ecma_string_t* ecma_new_ecma_string_from_number (ecma_number_t number); From 1f9add4735d996ec74899fe7e79e394190bee4da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Lang=C3=B3?= Date: Thu, 25 Jun 2015 23:48:24 +0300 Subject: [PATCH 15/18] Add ecma_char_is_word_char helper (part of IsWordChar abstract operation, ECMA-262 v5, 15.10.2.6); move hex_to_int from lexer to jerry-core/ecma/base/ecma-helpers-char.cpp, renaming it to ecma_char_hex_to_int. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com --- jerry-core/ecma/base/ecma-helpers-char.cpp | 58 ++++++++++++++++++++++ jerry-core/ecma/base/ecma-helpers.h | 2 + jerry-core/parser/js/lexer.cpp | 46 +++-------------- 3 files changed, 68 insertions(+), 38 deletions(-) diff --git a/jerry-core/ecma/base/ecma-helpers-char.cpp b/jerry-core/ecma/base/ecma-helpers-char.cpp index f1f2c1fec1..1f0196efa5 100644 --- a/jerry-core/ecma/base/ecma-helpers-char.cpp +++ b/jerry-core/ecma/base/ecma-helpers-char.cpp @@ -62,6 +62,64 @@ ecma_char_is_line_terminator (ecma_char_t c) /**< character value */ || ecma_char_is_new_line (c)); } /* ecma_char_is_line_terminator */ +/** + * Check if specified character is a word character (part of IsWordChar abstract operation) + * + * See also: ECMA-262 v5, 15.10.2.6 (IsWordChar) + * + * @return true - if the character is a word character + * false - otherwise. + */ +bool +ecma_char_is_word_char (ecma_char_t c) /**< character value */ +{ + if ((c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + || c == '_') + { + return true; + } + + return false; +} /* ecma_char_is_word_char */ + +/** + * Convert a hex character to an unsigned integer + * + * @return digit value, corresponding to the hex char + */ +uint32_t +ecma_char_hex_to_int (ecma_char_t hex) /**< [0-9A-Fa-f] character value */ +{ + switch (hex) + { + case '0': return 0x0; + case '1': return 0x1; + case '2': return 0x2; + case '3': return 0x3; + case '4': return 0x4; + case '5': return 0x5; + case '6': return 0x6; + case '7': return 0x7; + case '8': return 0x8; + case '9': return 0x9; + case 'a': + case 'A': return 0xA; + case 'b': + case 'B': return 0xB; + case 'c': + case 'C': return 0xC; + case 'd': + case 'D': return 0xD; + case 'e': + case 'E': return 0xE; + case 'f': + case 'F': return 0xF; + default: JERRY_UNREACHABLE (); + } +} /* ecma_char_hex_to_int */ + /** * @} * @} diff --git a/jerry-core/ecma/base/ecma-helpers.h b/jerry-core/ecma/base/ecma-helpers.h index 06c08f1d36..df4c4384d3 100644 --- a/jerry-core/ecma/base/ecma-helpers.h +++ b/jerry-core/ecma/base/ecma-helpers.h @@ -334,6 +334,8 @@ extern ecma_length_t ecma_number_to_zt_string (ecma_number_t num, ecma_char_t *b extern bool ecma_char_is_new_line (ecma_char_t c); extern bool ecma_char_is_carriage_return (ecma_char_t c); extern bool ecma_char_is_line_terminator (ecma_char_t c); +extern bool ecma_char_is_word_char (ecma_char_t c); +extern uint32_t ecma_char_hex_to_int (ecma_char_t hex); #endif /* !JERRY_ECMA_HELPERS_H */ diff --git a/jerry-core/parser/js/lexer.cpp b/jerry-core/parser/js/lexer.cpp index a5d375bdd1..a3926cc55e 100644 --- a/jerry-core/parser/js/lexer.cpp +++ b/jerry-core/parser/js/lexer.cpp @@ -1,4 +1,5 @@ /* Copyright 2014-2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -341,37 +342,6 @@ consume_char (void) } \ while (0) -static uint32_t -hex_to_int (char hex) -{ - switch (hex) - { - case '0': return 0x0; - case '1': return 0x1; - case '2': return 0x2; - case '3': return 0x3; - case '4': return 0x4; - case '5': return 0x5; - case '6': return 0x6; - case '7': return 0x7; - case '8': return 0x8; - case '9': return 0x9; - case 'a': - case 'A': return 0xA; - case 'b': - case 'B': return 0xB; - case 'c': - case 'C': return 0xC; - case 'd': - case 'D': return 0xD; - case 'e': - case 'E': return 0xE; - case 'f': - case 'F': return 0xF; - default: JERRY_UNREACHABLE (); - } -} - /** * Try to decode specified character as SingleEscapeCharacter (ECMA-262, v5, 7.8.4) * @@ -545,7 +515,7 @@ convert_string_to_token_transform_escape_seq (token_type tok_type, /**< type of JERRY_ASSERT ((char_code & 0xF000u) == 0); char_code = (uint16_t) (char_code << 4u); - char_code = (uint16_t) (char_code + hex_to_int (nc)); + char_code = (uint16_t) (char_code + ecma_char_hex_to_int ((ecma_char_t) nc)); } } @@ -761,11 +731,11 @@ parse_number (void) { if (!is_overflow) { - res = (res << 4) + hex_to_int (token_start[i]); + res = (res << 4) + ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } else { - fp_res = fp_res * 16 + (ecma_number_t) hex_to_int (token_start[i]); + fp_res = fp_res * 16 + (ecma_number_t) ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } if (res > 255) @@ -879,11 +849,11 @@ parse_number (void) { if (!is_overflow) { - res = res * 8 + hex_to_int (token_start[i]); + res = res * 8 + ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } else { - fp_res = fp_res * 8 + (ecma_number_t) hex_to_int (token_start[i]); + fp_res = fp_res * 8 + (ecma_number_t) ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } if (res > 255) { @@ -899,11 +869,11 @@ parse_number (void) { if (!is_overflow) { - res = res * 10 + hex_to_int (token_start[i]); + res = res * 10 + ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } else { - fp_res = fp_res * 10 + (ecma_number_t) hex_to_int (token_start[i]); + fp_res = fp_res * 10 + (ecma_number_t) ecma_char_hex_to_int ((ecma_char_t) token_start[i]); } if (res > 255) { From 4ffcb4d4645cd8a9027cc2efa2fa77f99c396056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Lang=C3=B3?= Date: Thu, 25 Jun 2015 23:51:34 +0300 Subject: [PATCH 16/18] Add parser and compiler of regular expressions. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com --- jerry-core/CMakeLists.txt | 3 + jerry-core/parser/regexp/re-compiler.cpp | 888 +++++++++++++++++++++++ jerry-core/parser/regexp/re-compiler.h | 108 +++ jerry-core/parser/regexp/re-parser.cpp | 808 +++++++++++++++++++++ jerry-core/parser/regexp/re-parser.h | 91 +++ 5 files changed, 1898 insertions(+) create mode 100644 jerry-core/parser/regexp/re-compiler.cpp create mode 100644 jerry-core/parser/regexp/re-compiler.h create mode 100644 jerry-core/parser/regexp/re-parser.cpp create mode 100644 jerry-core/parser/regexp/re-parser.h diff --git a/jerry-core/CMakeLists.txt b/jerry-core/CMakeLists.txt index b33115d3b2..2a67fc1402 100644 --- a/jerry-core/CMakeLists.txt +++ b/jerry-core/CMakeLists.txt @@ -102,6 +102,7 @@ project (JerryCore CXX C ASM) ${CMAKE_SOURCE_DIR}/jerry-core/ecma/operations ${CMAKE_SOURCE_DIR}/jerry-core/parser/js ${CMAKE_SOURCE_DIR}/jerry-core/parser/js/collections + ${CMAKE_SOURCE_DIR}/jerry-core/parser/regexp ${CMAKE_SOURCE_DIR}/jerry-core/jrt) # Third-party @@ -120,6 +121,7 @@ project (JerryCore CXX C ASM) file(GLOB SOURCE_CORE_ECMA_OPERATIONS ecma/operations/*.cpp) file(GLOB SOURCE_CORE_PARSER_JS parser/js/*.cpp) file(GLOB SOURCE_CORE_PARSER_JS_COLLECTIONS parser/js/collections/*.cpp) + file(GLOB SOURCE_CORE_PARSER_REGEXP parser/regexp/*.cpp) file(GLOB SOURCE_CORE_JRT jrt/*.cpp) set(SOURCE_CORE @@ -134,6 +136,7 @@ project (JerryCore CXX C ASM) ${SOURCE_CORE_ECMA_OPERATIONS} ${SOURCE_CORE_PARSER_JS} ${SOURCE_CORE_PARSER_JS_COLLECTIONS} + ${SOURCE_CORE_PARSER_REGEXP} ${SOURCE_CORE_JRT}) # Per-option configuration diff --git a/jerry-core/parser/regexp/re-compiler.cpp b/jerry-core/parser/regexp/re-compiler.cpp new file mode 100644 index 0000000000..f9f5145bc1 --- /dev/null +++ b/jerry-core/parser/regexp/re-compiler.cpp @@ -0,0 +1,888 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecma-exceptions.h" +#include "ecma-helpers.h" +#include "ecma-try-catch-macro.h" +#include "jrt-libc-includes.h" +#include "mem-heap.h" +#include "re-compiler.h" + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +/** + * FIXME: + * Add comments to macro definitions in the component + */ + +#define REGEXP_BYTECODE_BLOCK_SIZE 256UL +#define BYTECODE_LEN(bc_ctx_p) ((uint32_t) (bc_ctx_p->current_p - bc_ctx_p->block_start_p)) + +void +regexp_dump_bytecode (re_bytecode_ctx_t *bc_ctx); + +/** + * FIXME: + * Add missing 're' prefixes to the component's external and internal interfaces + */ + +/** + * Realloc the bytecode container + */ +static re_bytecode_t* +realloc_regexp_bytecode_block (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */ +{ + JERRY_ASSERT (bc_ctx_p->block_end_p - bc_ctx_p->block_start_p >= 0); + size_t old_size = static_cast (bc_ctx_p->block_end_p - bc_ctx_p->block_start_p); + JERRY_ASSERT (!bc_ctx_p->current_p && !bc_ctx_p->block_end_p && !bc_ctx_p->block_start_p); + + size_t new_block_size = old_size + REGEXP_BYTECODE_BLOCK_SIZE; + JERRY_ASSERT (bc_ctx_p->current_p - bc_ctx_p->block_start_p >= 0); + size_t current_ptr_offset = static_cast (bc_ctx_p->current_p - bc_ctx_p->block_start_p); + + re_bytecode_t *new_block_start_p = (re_bytecode_t *) mem_heap_alloc_block (new_block_size, + MEM_HEAP_ALLOC_SHORT_TERM); + if (bc_ctx_p->current_p) + { + memcpy (new_block_start_p, bc_ctx_p->block_start_p, static_cast (current_ptr_offset)); + mem_heap_free_block (bc_ctx_p->block_start_p); + } + bc_ctx_p->block_start_p = new_block_start_p; + bc_ctx_p->block_end_p = new_block_start_p + new_block_size; + bc_ctx_p->current_p = new_block_start_p + current_ptr_offset; + + return bc_ctx_p->current_p; +} /* realloc_regexp_bytecode_block */ + +/** + * Append a new bytecode to the and of the bytecode container + */ +static void +bytecode_list_append (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + re_bytecode_t *bytecode_p, /**< input bytecode */ + size_t length) /**< length of input */ +{ + JERRY_ASSERT (length <= REGEXP_BYTECODE_BLOCK_SIZE); + + re_bytecode_t *current_p = bc_ctx_p->current_p; + if (current_p + length > bc_ctx_p->block_end_p) + { + current_p = realloc_regexp_bytecode_block (bc_ctx_p); + } + + memcpy (current_p, bytecode_p, length); + bc_ctx_p->current_p += length; +} /* bytecode_list_append */ + +/** + * Insert a new bytecode to the bytecode container + */ +static void +bytecode_list_insert (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + size_t offset, /**< distance from the start of the container */ + re_bytecode_t *bytecode_p, /**< input bytecode */ + size_t length) /**< length of input */ +{ + JERRY_ASSERT (length <= REGEXP_BYTECODE_BLOCK_SIZE); + + re_bytecode_t *current_p = bc_ctx_p->current_p; + if (current_p + length > bc_ctx_p->block_end_p) + { + realloc_regexp_bytecode_block (bc_ctx_p); + } + + re_bytecode_t *src_p = bc_ctx_p->block_start_p + offset; + if ((BYTECODE_LEN (bc_ctx_p) - offset) > 0) + { + re_bytecode_t *dest_p = src_p + length; + re_bytecode_t *tmp_block_start_p = (re_bytecode_t *) mem_heap_alloc_block ((BYTECODE_LEN (bc_ctx_p) - offset), + MEM_HEAP_ALLOC_SHORT_TERM); + memcpy (tmp_block_start_p, src_p, (size_t) (BYTECODE_LEN (bc_ctx_p) - offset)); + memcpy (dest_p, tmp_block_start_p, (size_t) (BYTECODE_LEN (bc_ctx_p) - offset)); + mem_heap_free_block (tmp_block_start_p); + } + memcpy (src_p, bytecode_p, length); + + bc_ctx_p->current_p += length; +} /* bytecode_list_insert */ + +/** + * Append a RegExp opcode + */ +static void +append_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + re_opcode_t opcode) /**< input opcode */ +{ + bytecode_list_append (bc_ctx_p, (re_bytecode_t*) &opcode, sizeof (re_bytecode_t)); +} /* append_opcode */ + +/** + * Append a parameter of a RegExp opcode + */ +static void +append_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + uint32_t value) /**< input value */ +{ + bytecode_list_append (bc_ctx_p, (re_bytecode_t*) &value, sizeof (uint32_t)); +} /* append_u32 */ + +/** + * Append a jump offset parameter of a RegExp opcode + */ +static void +append_jump_offset (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + uint32_t value) /**< input value */ +{ + value += (uint32_t) (sizeof (uint32_t)); + append_u32 (bc_ctx_p, value); +} /* append_jump_offset */ + +/** + * Insert a RegExp opcode + */ +static void +insert_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + uint32_t offset, /**< distance from the start of the container */ + re_opcode_t opcode) /**< input opcode */ +{ + bytecode_list_insert (bc_ctx_p, offset, (re_bytecode_t*) &opcode, sizeof (re_bytecode_t)); +} /* insert_opcode */ + +/** + * Insert a parameter of a RegExp opcode + */ +static void +insert_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */ + uint32_t offset, /**< distance from the start of the container */ + uint32_t value) /**< input value */ +{ + bytecode_list_insert (bc_ctx_p, offset, (re_bytecode_t*) &value, sizeof (uint32_t)); +} /* insert_u32 */ + +/** + * Get a RegExp opcode + */ +re_opcode_t +re_get_opcode (re_bytecode_t **bc_p) /**< pointer to bytecode start */ +{ + re_bytecode_t bytecode = **bc_p; + (*bc_p) += sizeof (re_bytecode_t); + return (re_opcode_t) bytecode; +} /* get_opcode */ + +/** + * Get a parameter of a RegExp opcode + */ +uint32_t +re_get_value (re_bytecode_t **bc_p) /**< pointer to bytecode start */ +{ + uint32_t value = *((uint32_t*) *bc_p); + (*bc_p) += sizeof (uint32_t); + return value; +} /* get_value */ + +/** + * Callback function of character class generation + */ +static void +append_char_class (void* re_ctx_p, /**< RegExp compiler context */ + uint32_t start, /**< character class range from */ + uint32_t end) /**< character class range to */ +{ + /* FIXME: Handle ignore case flag and add unicode support. */ + re_compiler_ctx_t *ctx_p = (re_compiler_ctx_t*) re_ctx_p; + append_u32 (ctx_p->bytecode_ctx_p, start); + append_u32 (ctx_p->bytecode_ctx_p, end); + ctx_p->parser_ctx_p->num_of_classes++; +} /* append_char_class */ + +/** + * Insert simple atom iterator + */ +static void +insert_simple_iterator (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + uint32_t new_atom_start_offset) /**< atom start offset */ +{ + uint32_t atom_code_length; + uint32_t offset; + uint32_t qmin, qmax; + + qmin = re_ctx_p->current_token.qmin; + qmax = re_ctx_p->current_token.qmax; + JERRY_ASSERT (qmin <= qmax); + + /* FIXME: optimize bytecode length. Store 0 rather than INF */ + + append_opcode (re_ctx_p->bytecode_ctx_p, RE_OP_MATCH); /* complete 'sub atom' */ + uint32_t bytecode_length = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + atom_code_length = (uint32_t) (bytecode_length - new_atom_start_offset); + + offset = new_atom_start_offset; + insert_u32 (re_ctx_p->bytecode_ctx_p, offset, atom_code_length); + insert_u32 (re_ctx_p->bytecode_ctx_p, offset, qmax); + insert_u32 (re_ctx_p->bytecode_ctx_p, offset, qmin); + if (re_ctx_p->current_token.greedy) + { + insert_opcode (re_ctx_p->bytecode_ctx_p, offset, RE_OP_GREEDY_ITERATOR); + } + else + { + insert_opcode (re_ctx_p->bytecode_ctx_p, offset, RE_OP_NON_GREEDY_ITERATOR); + } +} /* insert_simple_iterator */ + +/** + * Get the type of a group start + */ +static re_opcode_t +get_start_opcode_type (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + bool is_capturable) /**< is capturabel group */ +{ + if (is_capturable) + { + if (re_ctx_p->current_token.qmin == 0) + { + if (re_ctx_p->current_token.greedy) + { + return RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START; + } + + return RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START; + } + + return RE_OP_CAPTURE_GROUP_START; + } + + if (re_ctx_p->current_token.qmin == 0) + { + if (re_ctx_p->current_token.greedy) + { + return RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START; + } + + return RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START; + } + + return RE_OP_NON_CAPTURE_GROUP_START; + + JERRY_UNREACHABLE (); + return 0; +} /* get_start_opcode_type */ + +/** + * Get the type of a group end + */ +static re_opcode_t +get_end_opcode_type (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + bool is_capturable) /**< is capturabel group */ +{ + if (is_capturable) + { + if (re_ctx_p->current_token.greedy) + { + return RE_OP_CAPTURE_GREEDY_GROUP_END; + } + + return RE_OP_CAPTURE_NON_GREEDY_GROUP_END; + } + + if (re_ctx_p->current_token.greedy) + { + return RE_OP_NON_CAPTURE_GREEDY_GROUP_END; + } + + return RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END; + + JERRY_UNREACHABLE (); + return 0; +} /* get_end_opcode_type */ + +/** + * Enclose the given bytecode to a group + */ +static void +insert_into_group (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + uint32_t group_start_offset, /**< offset of group start */ + uint32_t idx, /**< index of group */ + bool is_capturable) /**< is capturabel group */ +{ + uint32_t qmin, qmax; + re_opcode_t start_opcode = get_start_opcode_type (re_ctx_p, is_capturable); + re_opcode_t end_opcode = get_end_opcode_type (re_ctx_p, is_capturable); + uint32_t start_head_offset_len; + + qmin = re_ctx_p->current_token.qmin; + qmax = re_ctx_p->current_token.qmax; + JERRY_ASSERT (qmin <= qmax); + + start_head_offset_len = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + insert_u32 (re_ctx_p->bytecode_ctx_p, group_start_offset, idx); + insert_opcode (re_ctx_p->bytecode_ctx_p, group_start_offset, start_opcode); + start_head_offset_len = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - start_head_offset_len; + append_opcode (re_ctx_p->bytecode_ctx_p, end_opcode); + append_u32 (re_ctx_p->bytecode_ctx_p, idx); + append_u32 (re_ctx_p->bytecode_ctx_p, qmin); + append_u32 (re_ctx_p->bytecode_ctx_p, qmax); + + group_start_offset += start_head_offset_len; + append_jump_offset (re_ctx_p->bytecode_ctx_p, + BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - group_start_offset); + + if (start_opcode != RE_OP_CAPTURE_GROUP_START && start_opcode != RE_OP_NON_CAPTURE_GROUP_START) + { + insert_u32 (re_ctx_p->bytecode_ctx_p, + group_start_offset, + BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - group_start_offset); + } +} /* insert_into_group */ + +/** + * Enclose the given bytecode to a group and inster jump value + */ +static void +insert_into_group_with_jump (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + uint32_t group_start_offset, /**< offset of group start */ + uint32_t idx, /**< index of group */ + bool is_capturable) /**< is capturabel group */ +{ + insert_u32 (re_ctx_p->bytecode_ctx_p, + group_start_offset, + BYTECODE_LEN (re_ctx_p->bytecode_ctx_p) - group_start_offset); + insert_into_group (re_ctx_p, group_start_offset, idx, is_capturable); +} /* insert_into_group_with_jump */ + +/** + * Parse alternatives + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +static ecma_completion_value_t +parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */ + bool expect_eof) /**< expect end of file */ +{ + uint32_t idx; + re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p; + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + uint32_t alterantive_offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + + if (re_ctx_p->recursion_depth >= RE_COMPILE_RECURSION_LIMIT) + { + ret_value = ecma_raise_range_error ((const ecma_char_t *) "RegExp compiler recursion limit is exceeded."); + return ret_value; + } + re_ctx_p->recursion_depth++; + + while (true) + { + ECMA_TRY_CATCH (empty, + re_parse_next_token (re_ctx_p->parser_ctx_p, + &(re_ctx_p->current_token)), + ret_value); + ECMA_FINALIZE (empty); + if (!ecma_is_completion_value_empty (ret_value)) + { + return ret_value; /* error */ + } + uint32_t new_atom_start_offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + + switch (re_ctx_p->current_token.type) + { + case RE_TOK_START_CAPTURE_GROUP: + { + idx = re_ctx_p->num_of_captures++; + JERRY_DDLOG ("Compile a capture group start (idx: %d)\n", idx); + + ret_value = parse_alternative (re_ctx_p, false); + if (ecma_is_completion_value_empty (ret_value)) + { + insert_into_group (re_ctx_p, new_atom_start_offset, idx, true); + } + else + { + return ret_value; /* error */ + } + break; + } + case RE_TOK_START_NON_CAPTURE_GROUP: + { + idx = re_ctx_p->num_of_non_captures++; + JERRY_DDLOG ("Compile a non-capture group start (idx: %d)\n", idx); + + ret_value = parse_alternative (re_ctx_p, false); + if (ecma_is_completion_value_empty (ret_value)) + { + insert_into_group (re_ctx_p, new_atom_start_offset, idx, false); + } + else + { + return ret_value; /* error */ + } + break; + } + case RE_TOK_CHAR: + { + JERRY_DDLOG ("Compile character token: %c, qmin: %d, qmax: %d\n", + re_ctx_p->current_token.value, re_ctx_p->current_token.qmin, re_ctx_p->current_token.qmax); + + append_opcode (bc_ctx_p, RE_OP_CHAR); + append_u32 (bc_ctx_p, re_ctx_p->current_token.value); + + if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1)) + { + insert_simple_iterator (re_ctx_p, new_atom_start_offset); + } + break; + } + case RE_TOK_PERIOD: + { + JERRY_DDLOG ("Compile a period\n"); + append_opcode (bc_ctx_p, RE_OP_PERIOD); + + if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1)) + { + insert_simple_iterator (re_ctx_p, new_atom_start_offset); + } + break; + } + case RE_TOK_ALTERNATIVE: + { + JERRY_DDLOG ("Compile an alternative\n"); + insert_u32 (bc_ctx_p, alterantive_offset, BYTECODE_LEN (bc_ctx_p) - alterantive_offset); + append_opcode (bc_ctx_p, RE_OP_ALTERNATIVE); + alterantive_offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + break; + } + case RE_TOK_ASSERT_START: + { + JERRY_DDLOG ("Compile a start assertion\n"); + append_opcode (bc_ctx_p, RE_OP_ASSERT_START); + break; + } + case RE_TOK_ASSERT_END: + { + JERRY_DDLOG ("Compile an end assertion\n"); + append_opcode (bc_ctx_p, RE_OP_ASSERT_END); + break; + } + case RE_TOK_ASSERT_WORD_BOUNDARY: + { + JERRY_DDLOG ("Compile a word boundary assertion\n"); + append_opcode (bc_ctx_p, RE_OP_ASSERT_WORD_BOUNDARY); + break; + } + case RE_TOK_ASSERT_NOT_WORD_BOUNDARY: + { + JERRY_DDLOG ("Compile a not word boundary assertion\n"); + append_opcode (bc_ctx_p, RE_OP_ASSERT_NOT_WORD_BOUNDARY); + break; + } + case RE_TOK_ASSERT_START_POS_LOOKAHEAD: + { + JERRY_DDLOG ("Compile a positive lookahead assertion\n"); + idx = re_ctx_p->num_of_non_captures++; + append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_POS); + + ret_value = parse_alternative (re_ctx_p, false); + if (ecma_is_completion_value_empty (ret_value)) + { + append_opcode (bc_ctx_p, RE_OP_MATCH); + + insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false); + } + else + { + return ret_value; /* error */ + } + break; + } + case RE_TOK_ASSERT_START_NEG_LOOKAHEAD: + { + JERRY_DDLOG ("Compile a negative lookahead assertion\n"); + idx = re_ctx_p->num_of_non_captures++; + append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_NEG); + + ret_value = parse_alternative (re_ctx_p, false); + if (ecma_is_completion_value_empty (ret_value)) + { + append_opcode (bc_ctx_p, RE_OP_MATCH); + + insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false); + } + else + { + return ret_value; /* error */ + } + break; + } + case RE_TOK_BACKREFERENCE: + { + uint32_t backref = (uint32_t) re_ctx_p->current_token.value; + idx = re_ctx_p->num_of_non_captures++; + if (backref > re_ctx_p->highest_backref) + { + re_ctx_p->highest_backref = backref; + } + JERRY_DDLOG ("Compile a backreference: %d\n", backref); + append_opcode (bc_ctx_p, RE_OP_BACKREFERENCE); + append_u32 (bc_ctx_p, backref); + + insert_into_group_with_jump (re_ctx_p, new_atom_start_offset, idx, false); + break; + } + case RE_TOK_START_CHAR_CLASS: + case RE_TOK_START_INV_CHAR_CLASS: + { + JERRY_DDLOG ("Compile a character class\n"); + append_opcode (bc_ctx_p, + re_ctx_p->current_token.type == RE_TOK_START_CHAR_CLASS + ? RE_OP_CHAR_CLASS + : RE_OP_INV_CHAR_CLASS); + uint32_t offset = BYTECODE_LEN (re_ctx_p->bytecode_ctx_p); + + ECMA_TRY_CATCH (empty, + re_parse_char_class (re_ctx_p->parser_ctx_p, + append_char_class, + re_ctx_p, + &(re_ctx_p->current_token)), + ret_value); + insert_u32 (bc_ctx_p, offset, re_ctx_p->parser_ctx_p->num_of_classes); + + if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1)) + { + insert_simple_iterator (re_ctx_p, new_atom_start_offset); + } + ECMA_FINALIZE (empty); + break; + } + case RE_TOK_END_GROUP: + { + JERRY_DDLOG ("Compile a group end\n"); + + if (expect_eof) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected end of paren."); + } + else + { + insert_u32 (bc_ctx_p, alterantive_offset, BYTECODE_LEN (bc_ctx_p) - alterantive_offset); + re_ctx_p->recursion_depth--; + } + + return ret_value; + } + case RE_TOK_EOF: + { + if (!expect_eof) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected end of pattern."); + } + else + { + insert_u32 (bc_ctx_p, alterantive_offset, BYTECODE_LEN (bc_ctx_p) - alterantive_offset); + re_ctx_p->recursion_depth--; + } + + return ret_value; + } + default: + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Unexpected RegExp token."); + return ret_value; + } + } + } + + JERRY_UNREACHABLE (); + return ret_value; +} /* parse_alternative */ + +/** + * Compilation of RegExp bytecode + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +re_compile_bytecode (ecma_property_t *bytecode_p, /**< bytecode */ + ecma_string_t *pattern_str_p, /**< pattern */ + uint8_t flags) /**< flags */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + re_compiler_ctx_t re_ctx; + re_ctx.flags = flags; + re_ctx.highest_backref = 0; + re_ctx.num_of_non_captures = 0; + re_ctx.recursion_depth = 0; + + re_bytecode_ctx_t bc_ctx; + bc_ctx.block_start_p = NULL; + bc_ctx.block_end_p = NULL; + bc_ctx.current_p = NULL; + + re_ctx.bytecode_ctx_p = &bc_ctx; + + int32_t pattern_str_len = ecma_string_get_length (pattern_str_p); + MEM_DEFINE_LOCAL_ARRAY (pattern_start_p, pattern_str_len + 1, ecma_char_t); + ssize_t zt_str_size = (ssize_t) sizeof (ecma_char_t) * (pattern_str_len + 1); + ecma_string_to_zt_string (pattern_str_p, pattern_start_p, zt_str_size); + + re_parser_ctx_t parser_ctx; + parser_ctx.pattern_start_p = pattern_start_p; + parser_ctx.current_char_p = pattern_start_p; + parser_ctx.num_of_groups = -1; + re_ctx.parser_ctx_p = &parser_ctx; + + /* 1. Parse RegExp pattern */ + re_ctx.num_of_captures = 1; + append_opcode (&bc_ctx, RE_OP_SAVE_AT_START); + + ECMA_TRY_CATCH (empty, parse_alternative (&re_ctx, true), ret_value); + + /* 2. Check for invalid backreference */ + if (re_ctx.highest_backref >= re_ctx.num_of_captures) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid backreference.\n"); + } + else + { + append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH); + append_opcode (&bc_ctx, RE_OP_EOF); + + /* 3. Insert extra informations for bytecode header */ + insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_non_captures); + insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.num_of_captures * 2); + insert_u32 (&bc_ctx, 0, (uint32_t) re_ctx.flags); + } + ECMA_FINALIZE (empty); + + /* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */ + JERRY_ASSERT (bc_ctx.block_start_p != NULL); + ECMA_SET_POINTER (bytecode_p->u.internal_property.value, bc_ctx.block_start_p); + + MEM_FINALIZE_LOCAL_ARRAY (pattern_start_p); + +#ifdef JERRY_ENABLE_LOG + regexp_dump_bytecode (&bc_ctx); +#endif + + return ret_value; +} /* re_compile_bytecode */ + +#ifdef JERRY_ENABLE_LOG +/** + * RegExp bytecode dumper + */ +void +regexp_dump_bytecode (re_bytecode_ctx_t *bc_ctx_p) +{ + re_bytecode_t *bytecode_p = bc_ctx_p->block_start_p; + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d | ", re_get_value (&bytecode_p)); + + re_opcode_t op; + while ((op = re_get_opcode (&bytecode_p))) + { + switch (op) + { + case RE_OP_MATCH: + { + JERRY_DLOG ("MATCH, "); + break; + } + case RE_OP_CHAR: + { + JERRY_DLOG ("CHAR "); + JERRY_DLOG ("%c, ", (char) re_get_value (&bytecode_p)); + break; + } + case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START: + { + JERRY_DLOG ("N"); + /* FALLTHRU */ + } + case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START: + { + JERRY_DLOG ("GZ_START "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_CAPTURE_GROUP_START: + { + JERRY_DLOG ("START "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_CAPTURE_NON_GREEDY_GROUP_END: + { + JERRY_DLOG ("N"); + /* FALLTHRU */ + } + case RE_OP_CAPTURE_GREEDY_GROUP_END: + { + JERRY_DLOG ("G_END "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START: + { + JERRY_DLOG ("N"); + /* FALLTHRU */ + } + case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START: + { + JERRY_DLOG ("GZ_NC_START "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_NON_CAPTURE_GROUP_START: + { + JERRY_DLOG ("NC_START "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END: + { + JERRY_DLOG ("N"); + /* FALLTHRU */ + } + case RE_OP_NON_CAPTURE_GREEDY_GROUP_END: + { + JERRY_DLOG ("G_NC_END "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_SAVE_AT_START: + { + JERRY_DLOG ("RE_START "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_SAVE_AND_MATCH: + { + JERRY_DLOG ("RE_END, "); + break; + } + case RE_OP_GREEDY_ITERATOR: + { + JERRY_DLOG ("GREEDY_ITERATOR "); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_NON_GREEDY_ITERATOR: + { + JERRY_DLOG ("NON_GREEDY_ITERATOR "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_PERIOD: + { + JERRY_DLOG ("PERIOD "); + break; + } + case RE_OP_ALTERNATIVE: + { + JERRY_DLOG ("ALTERNATIVE "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_ASSERT_START: + { + JERRY_DLOG ("ASSERT_START "); + break; + } + case RE_OP_ASSERT_END: + { + JERRY_DLOG ("ASSERT_END "); + break; + } + case RE_OP_ASSERT_WORD_BOUNDARY: + { + JERRY_DLOG ("ASSERT_WORD_BOUNDARY "); + break; + } + case RE_OP_ASSERT_NOT_WORD_BOUNDARY: + { + JERRY_DLOG ("ASSERT_NOT_WORD_BOUNDARY "); + break; + } + case RE_OP_LOOKAHEAD_POS: + { + JERRY_DLOG ("LOOKAHEAD_POS "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_LOOKAHEAD_NEG: + { + JERRY_DLOG ("LOOKAHEAD_NEG "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_BACKREFERENCE: + { + JERRY_DLOG ("BACKREFERENCE "); + JERRY_DLOG ("%d, ", re_get_value (&bytecode_p)); + break; + } + case RE_OP_INV_CHAR_CLASS: + { + JERRY_DLOG ("INV_"); + /* FALLTHRU */ + } + case RE_OP_CHAR_CLASS: + { + JERRY_DLOG ("CHAR_CLASS "); + uint32_t num_of_class = re_get_value (&bytecode_p); + JERRY_DLOG ("%d", num_of_class); + while (num_of_class) + { + JERRY_DLOG (" %d", re_get_value (&bytecode_p)); + JERRY_DLOG ("-%d", re_get_value (&bytecode_p)); + num_of_class--; + } + JERRY_DLOG (", "); + break; + } + default: + { + JERRY_DLOG ("UNKNOWN(%d), ", (uint32_t) op); + break; + } + } + } + JERRY_DLOG ("EOF\n"); +} /* regexp_dump_bytecode */ +#endif /* JERRY_ENABLE_LOG */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ diff --git a/jerry-core/parser/regexp/re-compiler.h b/jerry-core/parser/regexp/re-compiler.h new file mode 100644 index 0000000000..73e4eedabe --- /dev/null +++ b/jerry-core/parser/regexp/re-compiler.h @@ -0,0 +1,108 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RE_COMPILER_H +#define RE_COMPILER_H + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +#include "ecma-globals.h" +#include "re-parser.h" + +/* RegExp opcodes + * Group opcode order is important, because RE_IS_CAPTURE_GROUP is based on it. + * Change it carfully. Capture opcodes should be at first. + */ +#define RE_OP_EOF 0 + +#define RE_OP_CAPTURE_GROUP_START 1 +#define RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START 2 +#define RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START 3 +#define RE_OP_CAPTURE_GREEDY_GROUP_END 4 +#define RE_OP_CAPTURE_NON_GREEDY_GROUP_END 5 +#define RE_OP_NON_CAPTURE_GROUP_START 6 +#define RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START 7 +#define RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START 8 +#define RE_OP_NON_CAPTURE_GREEDY_GROUP_END 9 +#define RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END 10 + +#define RE_OP_MATCH 11 +#define RE_OP_CHAR 12 +#define RE_OP_SAVE_AT_START 13 +#define RE_OP_SAVE_AND_MATCH 14 +#define RE_OP_PERIOD 15 +#define RE_OP_ALTERNATIVE 16 +#define RE_OP_GREEDY_ITERATOR 17 +#define RE_OP_NON_GREEDY_ITERATOR 18 +#define RE_OP_ASSERT_START 19 +#define RE_OP_ASSERT_END 20 +#define RE_OP_ASSERT_WORD_BOUNDARY 21 +#define RE_OP_ASSERT_NOT_WORD_BOUNDARY 22 +#define RE_OP_LOOKAHEAD_POS 23 +#define RE_OP_LOOKAHEAD_NEG 24 +#define RE_OP_BACKREFERENCE 25 +#define RE_OP_CHAR_CLASS 26 +#define RE_OP_INV_CHAR_CLASS 27 + +#define RE_COMPILE_RECURSION_LIMIT 100 + +#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0) + +typedef uint8_t re_opcode_t; /* type of RegExp opcodes */ +typedef uint8_t re_bytecode_t; /* type of standard bytecode elements (ex.: opcode parameters) */ + +/** + * Context of RegExp bytecode container + * + * FIXME: + * Add comments with description of the structure members + */ +typedef struct +{ + re_bytecode_t *block_start_p; + re_bytecode_t *block_end_p; + re_bytecode_t *current_p; +} re_bytecode_ctx_t; + +/** + * Context of RegExp compiler + * + * FIXME: + * Add comments with description of the structure members + */ +typedef struct +{ + uint8_t flags; + uint32_t recursion_depth; + uint32_t num_of_captures; + uint32_t num_of_non_captures; + uint32_t highest_backref; + re_bytecode_ctx_t *bytecode_ctx_p; + re_token_t current_token; + re_parser_ctx_t *parser_ctx_p; +} re_compiler_ctx_t; + +ecma_completion_value_t +re_compile_bytecode (ecma_property_t *bytecode_p, ecma_string_t *pattern_str_p, uint8_t flags); + +re_opcode_t +re_get_opcode (re_bytecode_t **bc_p); + +uint32_t +re_get_value (re_bytecode_t **bc_p); + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ +#endif /* RE_COMPILER_H */ diff --git a/jerry-core/parser/regexp/re-parser.cpp b/jerry-core/parser/regexp/re-parser.cpp new file mode 100644 index 0000000000..51ed3a8c9d --- /dev/null +++ b/jerry-core/parser/regexp/re-parser.cpp @@ -0,0 +1,808 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecma-exceptions.h" +#include "ecma-globals.h" +#include "ecma-helpers.h" +#include "ecma-try-catch-macro.h" +#include "jrt-libc-includes.h" +#include "re-parser.h" +#include "syntax-errors.h" + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +/* FIXME: change it, when unicode support would be implemented */ +#define RE_LOOKUP(str_p, lookup) (ecma_zt_string_length (str_p) > lookup ? str_p[lookup] : '\0') + +/* FIXME: change it, when unicode support would be implemented */ +#define RE_ADVANCE(str_p, advance) do { str_p += advance; } while (0) + +static ecma_char_t +get_ecma_char (ecma_char_t** char_p) +{ + /* FIXME: change to string iterator with unicode support, when it would be implemented */ + ecma_char_t ch = **char_p; + RE_ADVANCE (*char_p, 1); + return ch; +} /* get_ecma_char */ + +/** + * Parse RegExp iterators + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +static ecma_completion_value_t +parse_re_iterator (ecma_char_t *pattern_p, /**< RegExp pattern */ + re_token_t *re_token_p, /**< output token */ + uint32_t lookup, /**< size of lookup */ + uint32_t *advance_p) /**< output length of current advance */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + ecma_char_t ch0 = RE_LOOKUP (pattern_p, lookup); + ecma_char_t ch1 = RE_LOOKUP (pattern_p, lookup + 1); + + switch (ch0) + { + case '?': + { + re_token_p->qmin = 0; + re_token_p->qmax = 1; + if (ch1 == '?') + { + *advance_p = 2; + re_token_p->greedy = false; + } + else + { + *advance_p = 1; + re_token_p->greedy = true; + } + break; + } + case '*': + { + re_token_p->qmin = 0; + re_token_p->qmax = RE_ITERATOR_INFINITE; + if (ch1 == '?') + { + *advance_p = 2; + re_token_p->greedy = false; + } + else + { + *advance_p = 1; + re_token_p->greedy = true; + } + break; + } + case '+': + { + re_token_p->qmin = 1; + re_token_p->qmax = RE_ITERATOR_INFINITE; + if (ch1 == '?') + { + *advance_p = 2; + re_token_p->greedy = false; + } + else + { + *advance_p = 1; + re_token_p->greedy = true; + } + break; + } + case '{': + { + uint32_t qmin = 0; + uint32_t qmax = RE_ITERATOR_INFINITE; + uint32_t digits = 0; + while (true) + { + (*advance_p)++; + ch1 = RE_LOOKUP (pattern_p, lookup + *advance_p); + + if (isdigit (ch1)) + { + if (digits >= ECMA_NUMBER_MAX_DIGITS) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: too many digits."); + return ret_value; + } + digits++; + qmin = qmin * 10 + ecma_char_hex_to_int (ch1); + } + else if (ch1 == ',') + { + if (qmax != RE_ITERATOR_INFINITE) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: double comma."); + return ret_value; + } + if ((RE_LOOKUP (pattern_p, lookup + *advance_p + 1)) == '}') + { + if (digits == 0) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: missing digits."); + return ret_value; + } + + re_token_p->qmin = qmin; + re_token_p->qmax = RE_ITERATOR_INFINITE; + *advance_p += 2; + break; + } + qmax = qmin; + qmin = 0; + digits = 0; + } + else if (ch1 == '}') + { + if (digits == 0) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: missing digits."); + return ret_value; + } + + if (qmax != RE_ITERATOR_INFINITE) + { + re_token_p->qmin = qmax; + re_token_p->qmax = qmin; + } + else + { + re_token_p->qmin = qmin; + re_token_p->qmax = qmin; + } + + *advance_p += 1; + break; + } + else + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: unknown char."); + return ret_value; + } + } + + if ((RE_LOOKUP (pattern_p, lookup + *advance_p)) == '?') + { + re_token_p->greedy = false; + *advance_p += 1; + } + else + { + re_token_p->greedy = true; + } + break; + + JERRY_UNREACHABLE (); + break; + } + default: + { + re_token_p->qmin = 1; + re_token_p->qmax = 1; + re_token_p->greedy = true; + break; + } + } + + JERRY_ASSERT (ecma_is_completion_value_empty (ret_value)); + + if (re_token_p->qmin > re_token_p->qmax) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp quantifier error: qmin > qmax."); + } + + return ret_value; +} /* parse_re_iterator */ + +/** + * Count the number of groups in pattern + */ +static void +re_count_num_of_groups (re_parser_ctx_t *parser_ctx_p) /**< RegExp parser context */ +{ + ecma_char_t *pattern_p = parser_ctx_p->pattern_start_p; + ecma_char_t ch1; + int char_class_in = 0; + parser_ctx_p->num_of_groups = 0; + + ch1 = get_ecma_char (&pattern_p); + while (ch1 != '\0') + { + ecma_char_t ch0 = ch1; + ch1 = get_ecma_char (&pattern_p); + switch (ch0) + { + case '\\': + { + ch1 = get_ecma_char (&pattern_p); + break; + } + case '[': + { + char_class_in++; + break; + } + case ']': + { + if (!char_class_in) + { + char_class_in--; + } + break; + } + case '(': + { + if (ch1 != '?' && !char_class_in) + { + parser_ctx_p->num_of_groups++; + } + break; + } + } + } +} /* re_count_num_of_groups */ + +/** + * Read the input pattern and parse the range of character class + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */ + re_char_class_callback append_char_class, /**< callback function, + * which adds the char-ranges + * to the bytecode */ + void* re_ctx_p, /**< regexp compiler context */ + re_token_t *out_token_p) /**< output token */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + ecma_char_t **pattern_p = &(parser_ctx_p->current_char_p); + + out_token_p->qmax = out_token_p->qmin = 1; + ecma_char_t start = RE_CHAR_UNDEF; + bool is_range = false; + parser_ctx_p->num_of_classes = 0; + + do + { + ecma_char_t ch = get_ecma_char (pattern_p); + if (ch == ']') + { + if (start != RE_CHAR_UNDEF) + { + append_char_class (re_ctx_p, start, start); + } + break; + } + else if (ch == '-') + { + if (start != RE_CHAR_UNDEF && !is_range && RE_LOOKUP (*pattern_p, 0) != ']') + { + is_range = true; + continue; + } + } + else if (ch == '\\') + { + ch = get_ecma_char (pattern_p); + + if (ch == 'b') + { + ch = RE_CONTROL_CHAR_BEL; + } + else if (ch == 'f') + { + ch = RE_CONTROL_CHAR_FF; + } + else if (ch == 'n') + { + ch = RE_CONTROL_CHAR_EOL; + } + else if (ch == 't') + { + ch = RE_CONTROL_CHAR_TAB; + } + else if (ch == 'r') + { + ch = RE_CONTROL_CHAR_CR; + } + else if (ch == 'v') + { + ch = RE_CONTROL_CHAR_VT; + } + else if (ch == 'c') + { + ch = get_ecma_char (pattern_p); + if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) + { + ch = (ch % 32); + } + else + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid regexp control escape"); + return ret_value; + } + } + else if (ch == 'x') + { + /* FIXME: get unicode char from hex-digits */ + /* ch = ...; */ + } + else if (ch == 'u') + { + /* FIXME: get unicode char from digits */ + /* ch = ...; */ + } + else if (ch == 'd') + { + /* append digits from '0' to '9'. */ + append_char_class (re_ctx_p, 0x0030UL, 0x0039UL); + ch = RE_CHAR_UNDEF; + } + else if (ch == 'D') + { + append_char_class (re_ctx_p, 0x0000UL, 0x002FUL); + append_char_class (re_ctx_p, 0x003AUL, 0xFFFFUL); + ch = RE_CHAR_UNDEF; + } + else if (ch == 's') + { + append_char_class (re_ctx_p, 0x0009UL, 0x000DUL); + append_char_class (re_ctx_p, 0x0020UL, 0x0020UL); + append_char_class (re_ctx_p, 0x00A0UL, 0x00A0UL); + append_char_class (re_ctx_p, 0x1680UL, 0x1680UL); + append_char_class (re_ctx_p, 0x180EUL, 0x180EUL); + append_char_class (re_ctx_p, 0x2000UL, 0x200AUL); + append_char_class (re_ctx_p, 0x2028UL, 0x2029UL); + append_char_class (re_ctx_p, 0x202FUL, 0x202FUL); + append_char_class (re_ctx_p, 0x205FUL, 0x205FUL); + append_char_class (re_ctx_p, 0x3000UL, 0x3000UL); + append_char_class (re_ctx_p, 0xFEFFUL, 0xFEFFUL); + ch = RE_CHAR_UNDEF; + } + else if (ch == 'S') + { + append_char_class (re_ctx_p, 0x0000UL, 0x0008UL); + append_char_class (re_ctx_p, 0x000EUL, 0x001FUL); + append_char_class (re_ctx_p, 0x0021UL, 0x009FUL); + append_char_class (re_ctx_p, 0x00A1UL, 0x167FUL); + append_char_class (re_ctx_p, 0x1681UL, 0x180DUL); + append_char_class (re_ctx_p, 0x180FUL, 0x1FFFUL); + append_char_class (re_ctx_p, 0x200BUL, 0x2027UL); + append_char_class (re_ctx_p, 0x202AUL, 0x202EUL); + append_char_class (re_ctx_p, 0x2030UL, 0x205EUL); + append_char_class (re_ctx_p, 0x2060UL, 0x2FFFUL); + append_char_class (re_ctx_p, 0x3001UL, 0xFEFEUL); + append_char_class (re_ctx_p, 0xFF00UL, 0xFFFFUL); + ch = RE_CHAR_UNDEF; + } + else if (ch == 'w') + { + append_char_class (re_ctx_p, 0x0030UL, 0x0039UL); + append_char_class (re_ctx_p, 0x0041UL, 0x005AUL); + append_char_class (re_ctx_p, 0x005FUL, 0x005FUL); + append_char_class (re_ctx_p, 0x0061UL, 0x007AUL); + ch = RE_CHAR_UNDEF; + } + else if (ch == 'W') + { + append_char_class (re_ctx_p, 0x0000UL, 0x002FUL); + append_char_class (re_ctx_p, 0x003AUL, 0x0040UL); + append_char_class (re_ctx_p, 0x005BUL, 0x005EUL); + append_char_class (re_ctx_p, 0x0060UL, 0x0060UL); + append_char_class (re_ctx_p, 0x007BUL, 0xFFFFUL); + ch = RE_CHAR_UNDEF; + } + else if (isdigit (ch)) + { + if (ch != '\0' || isdigit (RE_LOOKUP (*pattern_p, 1))) + { + /* FIXME: octal support */ + } + } + /* FIXME: depends on the unicode support + else if (!jerry_unicode_identifier (ch)) + { + JERRY_ERROR_MSG ("RegExp escape pattern error. (Char class)"); + } + */ + } + + if (ch == RE_CHAR_UNDEF) + { + if (start != RE_CHAR_UNDEF) + { + if (is_range) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid character class range"); + return ret_value; + } + else + { + append_char_class (re_ctx_p, start, start); + start = RE_CHAR_UNDEF; + } + } + } + else + { + if (start != RE_CHAR_UNDEF) + { + if (is_range) + { + if (start > ch) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid character class range"); + return ret_value; + } + else + { + append_char_class (re_ctx_p, start, ch); + start = RE_CHAR_UNDEF; + is_range = false; + } + } + else + { + append_char_class (re_ctx_p, start, start); + start = ch; + } + } + else + { + start = ch; + } + } + } + while (true); + + uint32_t advance = 0; + ECMA_TRY_CATCH (empty, + parse_re_iterator (parser_ctx_p->current_char_p, + out_token_p, + 0, + &advance), + ret_value); + RE_ADVANCE (parser_ctx_p->current_char_p, advance); + ECMA_FINALIZE (empty); + + return ret_value; +} /* re_parse_char_class */ + +/** + * Read the input pattern and parse the next token for the RegExp compiler + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context */ + re_token_t *out_token_p) /**< output token */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + uint32_t advance = 0; + ecma_char_t ch0 = *(parser_ctx_p->current_char_p); + + switch (ch0) + { + case '|': + { + advance = 1; + out_token_p->type = RE_TOK_ALTERNATIVE; + break; + } + case '^': + { + advance = 1; + out_token_p->type = RE_TOK_ASSERT_START; + break; + } + case '$': + { + advance = 1; + out_token_p->type = RE_TOK_ASSERT_END; + break; + } + case '.': + { + ECMA_TRY_CATCH (empty, + parse_re_iterator (parser_ctx_p->current_char_p, + out_token_p, + 1, + &advance), + ret_value); + advance += 1; + out_token_p->type = RE_TOK_PERIOD; + ECMA_FINALIZE (empty); + break; + } + case '\\': + { + advance = 2; + out_token_p->type = RE_TOK_CHAR; + ecma_char_t ch1 = RE_LOOKUP (parser_ctx_p->current_char_p, 1); + + if (ch1 == 'b') + { + out_token_p->type = RE_TOK_ASSERT_WORD_BOUNDARY; + } + else if (ch1 == 'B') + { + out_token_p->type = RE_TOK_ASSERT_NOT_WORD_BOUNDARY; + } + else if (ch1 == 'f') + { + out_token_p->value = RE_CONTROL_CHAR_FF; + } + else if (ch1 == 'n') + { + out_token_p->value = RE_CONTROL_CHAR_EOL; + } + else if (ch1 == 't') + { + out_token_p->value = RE_CONTROL_CHAR_TAB; + } + else if (ch1 == 'r') + { + out_token_p->value = RE_CONTROL_CHAR_CR; + } + else if (ch1 == 'v') + { + out_token_p->value = RE_CONTROL_CHAR_VT; + } + else if (ch1 == 'c') + { + ecma_char_t ch2 = RE_LOOKUP (parser_ctx_p->current_char_p, 2); + if ((ch2 >= 'A' && ch2 <= 'Z') || (ch2 >= 'a' && ch2 <= 'z')) + { + advance = 3; + out_token_p->type = RE_TOK_CHAR; + out_token_p->value = (ch2 % 32); + } + else + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "invalid regexp control escape"); + break; + } + } + else if (ch1 == 'x' + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 2)) + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 3))) + { + advance = 4; + out_token_p->type = RE_TOK_CHAR; + /* FIXME: get unicode char from hex-digits */ + /* result.value = ...; */ + } + else if (ch1 == 'u' + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 2)) + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 3)) + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 4)) + && isxdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 5))) + { + advance = 4; + out_token_p->type = RE_TOK_CHAR; + /* FIXME: get unicode char from digits */ + /* result.value = ...; */ + } + else if (ch1 == 'd') + { + advance = 2; + out_token_p->type = RE_TOK_DIGIT; + } + else if (ch1 == 'D') + { + advance = 2; + out_token_p->type = RE_TOK_NOT_DIGIT; + } + else if (ch1 == 's') + { + advance = 2; + out_token_p->type = RE_TOK_WHITE; + } + else if (ch1 == 'S') + { + advance = 2; + out_token_p->type = RE_TOK_NOT_WHITE; + } + else if (ch1 == 'w') + { + advance = 2; + out_token_p->type = RE_TOK_WORD_CHAR; + } + else if (ch1 == 'W') + { + advance = 2; + out_token_p->type = RE_TOK_NOT_WORD_CHAR; + } + else if (isdigit (ch1)) + { + if (ch1 == '0') + { + if (isdigit (RE_LOOKUP (parser_ctx_p->current_char_p, 2))) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "RegExp escape pattern error."); + break; + } + + advance = 2; + out_token_p->value = RE_CONTROL_CHAR_NUL; + } + else + { + if (parser_ctx_p->num_of_groups == -1) + { + re_count_num_of_groups (parser_ctx_p); + } + + if (parser_ctx_p->num_of_groups) + { + uint32_t number = 0; + int index = 0; + advance = 0; + + do + { + if (index >= RE_MAX_RE_DECESC_DIGITS) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) + "RegExp escape pattern error: decimal escape too long."); + return ret_value; + } + + advance++; + ecma_char_t digit = RE_LOOKUP (parser_ctx_p->current_char_p, advance); + if (!isdigit (digit)) + { + break; + } + number = number * 10 + ecma_char_hex_to_int (digit); + index++; + } + while (true); + + if ((int) number <= parser_ctx_p->num_of_groups) + { + out_token_p->type = RE_TOK_BACKREFERENCE; + } + + out_token_p->value = number; + } + else + { + out_token_p->value = ch1; + } + } + } + else + { + out_token_p->value = ch1; + } + + uint32_t iter_adv = 0; + ECMA_TRY_CATCH (empty, + parse_re_iterator (parser_ctx_p->current_char_p, + out_token_p, + advance, + &iter_adv), + ret_value); + advance += iter_adv; + ECMA_FINALIZE (empty); + break; + } + case '(': + { + if (RE_LOOKUP (parser_ctx_p->current_char_p, 1) == '?') + { + ecma_char_t ch2 = RE_LOOKUP (parser_ctx_p->current_char_p, 2); + if (ch2 == '=') + { + /* (?= */ + advance = 3; + out_token_p->type = RE_TOK_ASSERT_START_POS_LOOKAHEAD; + } + else if (ch2 == '!') + { + /* (?! */ + advance = 3; + out_token_p->type = RE_TOK_ASSERT_START_NEG_LOOKAHEAD; + } + else if (ch2 == ':') + { + /* (?: */ + advance = 3; + out_token_p->type = RE_TOK_START_NON_CAPTURE_GROUP; + } + } + else + { + /* ( */ + advance = 1; + out_token_p->type = RE_TOK_START_CAPTURE_GROUP; + } + break; + } + case ')': + { + ECMA_TRY_CATCH (empty, + parse_re_iterator (parser_ctx_p->current_char_p, + out_token_p, + 1, + &advance), + ret_value); + advance += 1; + out_token_p->type = RE_TOK_END_GROUP; + ECMA_FINALIZE (empty); + break; + } + case '[': + { + advance = 1; + out_token_p->type = RE_TOK_START_CHAR_CLASS; + if (RE_LOOKUP (parser_ctx_p->current_char_p, 1) == '^') + { + advance = 2; + out_token_p->type = RE_TOK_START_INV_CHAR_CLASS; + } + break; + } + case ']': + case '}': + case '?': + case '*': + case '+': + case '{': + { + JERRY_UNREACHABLE (); + break; + } + case '\0': + { + advance = 0; + out_token_p->type = RE_TOK_EOF; + break; + } + default: + { + ECMA_TRY_CATCH (empty, + parse_re_iterator (parser_ctx_p->current_char_p, + out_token_p, + 1, + &advance), + ret_value); + advance += 1; + out_token_p->type = RE_TOK_CHAR; + out_token_p->value = ch0; + ECMA_FINALIZE (empty); + break; + } + } + + if (ecma_is_completion_value_empty (ret_value)) + { + RE_ADVANCE (parser_ctx_p->current_char_p, advance); + } + + return ret_value; +} /* re_parse_next_token */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ diff --git a/jerry-core/parser/regexp/re-parser.h b/jerry-core/parser/regexp/re-parser.h new file mode 100644 index 0000000000..160cbce7cd --- /dev/null +++ b/jerry-core/parser/regexp/re-parser.h @@ -0,0 +1,91 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RE_PARSER_H +#define RE_PARSER_H + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +#include "opcodes-dumper.h" + +typedef uint8_t token_type_t; + +#define RE_TOK_EOF 0 /* EOF */ +#define RE_TOK_BACKREFERENCE 1 /* \[0..9] */ +#define RE_TOK_CHAR 2 /* any character */ +#define RE_TOK_ALTERNATIVE 3 /* | */ +#define RE_TOK_ASSERT_START 4 /* ^ */ +#define RE_TOK_ASSERT_END 5 /* $ */ +#define RE_TOK_PERIOD 6 /* . */ +#define RE_TOK_START_CAPTURE_GROUP 7 /* ( */ +#define RE_TOK_START_NON_CAPTURE_GROUP 8 /* (?: */ +#define RE_TOK_END_GROUP 9 /* ')' */ +#define RE_TOK_ASSERT_START_POS_LOOKAHEAD 10 /* (?= */ +#define RE_TOK_ASSERT_START_NEG_LOOKAHEAD 11 /* (?! */ +#define RE_TOK_ASSERT_WORD_BOUNDARY 12 /* \b */ +#define RE_TOK_ASSERT_NOT_WORD_BOUNDARY 13 /* \B */ +#define RE_TOK_DIGIT 14 /* \d */ +#define RE_TOK_NOT_DIGIT 15 /* \D */ +#define RE_TOK_WHITE 16 /* \s */ +#define RE_TOK_NOT_WHITE 17 /* \S */ +#define RE_TOK_WORD_CHAR 18 /* \w */ +#define RE_TOK_NOT_WORD_CHAR 19 /* \W */ +#define RE_TOK_START_CHAR_CLASS 20 /* [ ] */ +#define RE_TOK_START_INV_CHAR_CLASS 21 /* [^ ] */ + +#define RE_ITERATOR_INFINITE ((uint32_t)-1) +#define RE_MAX_RE_DECESC_DIGITS 9 + +/* FIXME: depends on unicode support */ +#define RE_CHAR_UNDEF ((ecma_char_t)-1) + +#define RE_CONTROL_CHAR_NUL 0x0000 /* \0 */ +#define RE_CONTROL_CHAR_BEL 0x0008 /* \b */ +#define RE_CONTROL_CHAR_TAB 0x0009 /* \t */ +#define RE_CONTROL_CHAR_EOL 0x000a /* \n */ +#define RE_CONTROL_CHAR_VT 0x000b /* \v */ +#define RE_CONTROL_CHAR_FF 0x000c /* \f */ +#define RE_CONTROL_CHAR_CR 0x000d /* \r */ + +typedef struct +{ + token_type_t type; + uint32_t value; + uint32_t qmin; + uint32_t qmax; + bool greedy; +} re_token_t; + +typedef struct +{ + ecma_char_t *pattern_start_p; + ecma_char_t *current_char_p; + int num_of_groups; + uint32_t num_of_classes; +} re_parser_ctx_t; + +typedef void (*re_char_class_callback) (void *re_ctx_p, uint32_t start, uint32_t end); + +ecma_completion_value_t +re_parse_char_class (re_parser_ctx_t *parser_ctx_p, + re_char_class_callback append_char_class, + void *re_ctx_p, re_token_t *out_token_p); + +ecma_completion_value_t +re_parse_next_token (re_parser_ctx_t *parser_ctx_p, re_token_t *out_token_p); + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ +#endif /* RE_PARSER_H */ From f992f5d92e348e7549f2d3972b16af5544472046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Lang=C3=B3?= Date: Thu, 25 Jun 2015 23:58:36 +0300 Subject: [PATCH 17/18] Add RegExp object constructor, regular expression matching procedures, RegExp and RegExp.prototype built-in objects. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com --- jerry-core/ecma/base/ecma-gc.cpp | 1 + jerry-core/ecma/base/ecma-globals.h | 5 + jerry-core/ecma/base/ecma-helpers.cpp | 5 + jerry-core/ecma/base/ecma-magic-strings.inc.h | 15 +- .../builtin-objects/ecma-builtin-global.inc.h | 12 +- .../ecma-builtin-regexp-prototype.cpp | 229 +++ .../ecma-builtin-regexp-prototype.inc.h | 52 + .../builtin-objects/ecma-builtin-regexp.cpp | 142 ++ .../builtin-objects/ecma-builtin-regexp.inc.h | 97 ++ .../ecma/builtin-objects/ecma-builtins.inc.h | 18 + jerry-core/ecma/operations/ecma-objects.cpp | 6 + .../ecma/operations/ecma-regexp-object.cpp | 1329 +++++++++++++++++ .../ecma/operations/ecma-regexp-object.h | 66 + 13 files changed, 1971 insertions(+), 6 deletions(-) create mode 100644 jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp create mode 100644 jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h create mode 100644 jerry-core/ecma/builtin-objects/ecma-builtin-regexp.cpp create mode 100644 jerry-core/ecma/builtin-objects/ecma-builtin-regexp.inc.h create mode 100644 jerry-core/ecma/operations/ecma-regexp-object.cpp create mode 100644 jerry-core/ecma/operations/ecma-regexp-object.h diff --git a/jerry-core/ecma/base/ecma-gc.cpp b/jerry-core/ecma/base/ecma-gc.cpp index 00fcf9b861..27ea2ec4b0 100644 --- a/jerry-core/ecma/base/ecma-gc.cpp +++ b/jerry-core/ecma/base/ecma-gc.cpp @@ -330,6 +330,7 @@ ecma_gc_mark (ecma_object_t *object_p) /**< object to mark from */ case ECMA_INTERNAL_PROPERTY_EXTENSION_ID: /* an integer */ case ECMA_INTERNAL_PROPERTY_NON_INSTANTIATED_BUILT_IN_MASK_0_31: /* an integer (bit-mask) */ case ECMA_INTERNAL_PROPERTY_NON_INSTANTIATED_BUILT_IN_MASK_32_63: /* an integer (bit-mask) */ + case ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE: { break; } diff --git a/jerry-core/ecma/base/ecma-globals.h b/jerry-core/ecma/base/ecma-globals.h index 49d6988676..072ddeb1fb 100644 --- a/jerry-core/ecma/base/ecma-globals.h +++ b/jerry-core/ecma/base/ecma-globals.h @@ -255,6 +255,11 @@ typedef enum */ ECMA_INTERNAL_PROPERTY_NON_INSTANTIATED_BUILT_IN_MASK_32_63, + /** + * RegExp bytecode array + */ + ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE, + /** * Number of internal properties' types */ diff --git a/jerry-core/ecma/base/ecma-helpers.cpp b/jerry-core/ecma/base/ecma-helpers.cpp index 2db1512ddf..e61877136a 100644 --- a/jerry-core/ecma/base/ecma-helpers.cpp +++ b/jerry-core/ecma/base/ecma-helpers.cpp @@ -809,6 +809,11 @@ ecma_free_internal_property (ecma_property_t *property_p) /**< the property */ { JERRY_UNREACHABLE (); } + case ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE: + { + void *bytecode_p = ECMA_GET_NON_NULL_POINTER (void, property_value); + mem_heap_free_block (bytecode_p); + } } ecma_dealloc_property (property_p); diff --git a/jerry-core/ecma/base/ecma-magic-strings.inc.h b/jerry-core/ecma/base/ecma-magic-strings.inc.h index 911d2421ed..b6ddaa4cff 100644 --- a/jerry-core/ecma/base/ecma-magic-strings.inc.h +++ b/jerry-core/ecma/base/ecma-magic-strings.inc.h @@ -32,6 +32,13 @@ ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_STRING, "string") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_OBJECT, "object") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_FUNCTION, "function") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_LENGTH, "length") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_SOURCE, "source") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_GLOBAL, "global") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_IGNORECASE_UL, "ignoreCase") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_MULTILINE, "multiline") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_INDEX, "index") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_INPUT, "input") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_LASTINDEX_UL, "lastIndex") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_NAN, "NaN") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_INFINITY_UL, "Infinity") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_UNDEFINED_UL, "Undefined") @@ -44,7 +51,8 @@ ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_STRING_UL, "String") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_BOOLEAN_UL, "Boolean") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_NUMBER_UL, "Number") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_DATE_UL, "Date") -ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_REG_EXP_UL, "RegExp") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_REGEXP_UL, "RegExp") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_REGEXP_SOURCE_UL, "Source") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_ERROR_UL, "Error") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_EVAL_ERROR_UL, "EvalError") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_RANGE_ERROR_UL, "RangeError") @@ -205,6 +213,11 @@ ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_EXEC, "exec") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_TEST, "test") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_NAME, "name") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_MESSAGE, "message") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_G_CHAR, "g") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_I_CHAR, "i") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_M_CHAR, "m") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_SLASH_CHAR, "/") +ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP, "(?:)") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_LEFT_SQUARE_CHAR, "[") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_RIGHT_SQUARE_CHAR, "]") ECMA_MAGIC_STRING_DEF (ECMA_MAGIC_STRING_COLON_CHAR, ":") diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h index a300c76f84..43e1749842 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h @@ -133,12 +133,14 @@ OBJECT_VALUE (ECMA_MAGIC_STRING_DATE_UL, ECMA_PROPERTY_CONFIGURABLE) #endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_DATE_BUILTIN */ +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN // ECMA-262 v5, 15.1.4.8 -CP_UNIMPLEMENTED_VALUE (ECMA_MAGIC_STRING_REG_EXP_UL, - ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP), - ECMA_PROPERTY_WRITABLE, - ECMA_PROPERTY_NOT_ENUMERABLE, - ECMA_PROPERTY_CONFIGURABLE) +OBJECT_VALUE (ECMA_MAGIC_STRING_REGEXP_UL, + ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP), + ECMA_PROPERTY_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_CONFIGURABLE) +#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ #ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ERROR_BUILTINS // ECMA-262 v5, 15.1.4.9 diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp new file mode 100644 index 0000000000..c197d41010 --- /dev/null +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.cpp @@ -0,0 +1,229 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecma-builtins.h" +#include "ecma-conversion.h" +#include "ecma-exceptions.h" +#include "ecma-globals.h" +#include "ecma-helpers.h" +#include "ecma-objects.h" +#include "ecma-try-catch-macro.h" + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN +#include "ecma-regexp-object.h" +#include "re-compiler.h" + +#define ECMA_BUILTINS_INTERNAL +#include "ecma-builtins-internal.h" + +#define BUILTIN_INC_HEADER_NAME "ecma-builtin-regexp-prototype.inc.h" +#define BUILTIN_UNDERSCORED_ID regexp_prototype +#include "ecma-builtin-internal-routines-template.inc.h" + +/** \addtogroup ecma ECMA + * @{ + * + * \addtogroup ecmabuiltins + * @{ + * + * \addtogroup regexp ECMA RegExp.prototype object built-in + * @{ + */ + +/** + * The RegExp.prototype object's 'exec' routine + * + * See also: + * ECMA-262 v5, 15.10.6.2 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value. + */ +static ecma_completion_value_t +ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */ + ecma_value_t arg) /**< routine's argument */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + if (ecma_object_get_class_name (ecma_get_object_from_value (this_arg)) != ECMA_MAGIC_STRING_REGEXP_UL) + { + ret_value = ecma_raise_type_error ((const ecma_char_t *) "Incomplete RegExp type"); + } + else + { + ECMA_TRY_CATCH (obj_this, ecma_op_to_object (this_arg), ret_value); + + ecma_object_t *obj_p = ecma_get_object_from_value (obj_this); + ecma_property_t *bytecode_prop_p = ecma_get_internal_property (obj_p, ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE); + re_bytecode_t *bytecode_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value); + + ECMA_TRY_CATCH (input_str_value, + ecma_op_to_string (arg), + ret_value); + + ecma_string_t *input_str_p = ecma_get_string_from_value (input_str_value); + + /* Convert ecma_String_t *to regexp_bytecode_t* */ + int32_t input_str_len = ecma_string_get_length (input_str_p); + + MEM_DEFINE_LOCAL_ARRAY (input_zt_str_p, input_str_len + 1, ecma_char_t); + + ssize_t zt_str_size = (ssize_t) sizeof (ecma_char_t) * (input_str_len + 1); + ecma_string_to_zt_string (input_str_p, input_zt_str_p, zt_str_size); + + ret_value = ecma_regexp_exec_helper (obj_p, bytecode_p, input_zt_str_p); + + MEM_FINALIZE_LOCAL_ARRAY (input_zt_str_p); + + ECMA_FINALIZE (input_str_value); + + ECMA_FINALIZE (obj_this); + } + + return ret_value; +} /* ecma_builtin_regexp_prototype_exec */ + +/** + * The RegExp.prototype object's 'test' routine + * + * See also: + * ECMA-262 v5, 15.10.6.3 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value. + */ +static ecma_completion_value_t +ecma_builtin_regexp_prototype_test (ecma_value_t this_arg, /**< this argument */ + ecma_value_t arg) /**< routine's argument */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + ECMA_TRY_CATCH (match_value, + ecma_builtin_regexp_prototype_exec (this_arg, arg), + ret_value); + + if (ecma_is_value_undefined (match_value)) + { + ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); + } + else + { + ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); + } + + ECMA_FINALIZE (match_value); + + return ret_value; +} /* ecma_builtin_regexp_prototype_test */ + +/** + * The RegExp.prototype object's 'toString' routine + * + * See also: + * ECMA-262 v5, 15.10.6.4 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value. + */ +static ecma_completion_value_t +ecma_builtin_regexp_prototype_to_string (ecma_value_t this_arg) /**< this argument */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + if (ecma_object_get_class_name (ecma_get_object_from_value (this_arg)) != ECMA_MAGIC_STRING_REGEXP_UL) + { + ret_value = ecma_raise_type_error ((const ecma_char_t *) "Incomplete RegExp type"); + } + else + { + ECMA_TRY_CATCH (obj_this, + ecma_op_to_object (this_arg), + ret_value); + + ecma_object_t *obj_p = ecma_get_object_from_value (obj_this); + + /* Get RegExp source from the source property */ + ecma_string_t *magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_SOURCE); + ecma_property_t *source_prop_p = ecma_op_object_get_property (obj_p, magic_string_p); + ecma_deref_ecma_string (magic_string_p); + + ecma_string_t *src_sep_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_SLASH_CHAR); + ecma_string_t *source_str_p = ecma_get_string_from_value (source_prop_p->u.named_data_property.value); + ecma_string_t *output_str_p = ecma_concat_ecma_strings (src_sep_str_p, ecma_copy_or_ref_ecma_string (source_str_p)); + ecma_deref_ecma_string (source_str_p); + + ecma_string_t *concat_p = ecma_concat_ecma_strings (output_str_p, src_sep_str_p); + ecma_deref_ecma_string (src_sep_str_p); + ecma_deref_ecma_string (output_str_p); + output_str_p = concat_p; + + /* Check the global flag */ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_GLOBAL); + ecma_property_t *global_prop_p = ecma_op_object_get_property (obj_p, magic_string_p); + ecma_deref_ecma_string (magic_string_p); + + if (ecma_is_value_true (global_prop_p->u.named_data_property.value)) + { + ecma_string_t *g_flag_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_G_CHAR); + concat_p = ecma_concat_ecma_strings (output_str_p, g_flag_str_p); + ecma_deref_ecma_string (output_str_p); + ecma_deref_ecma_string (g_flag_str_p); + output_str_p = concat_p; + } + + /* Check the ignoreCase flag */ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_IGNORECASE_UL); + ecma_property_t *ignorecase_prop_p = ecma_op_object_get_property (obj_p, magic_string_p); + ecma_deref_ecma_string (magic_string_p); + + if (ecma_is_value_true (ignorecase_prop_p->u.named_data_property.value)) + { + ecma_string_t *ic_flag_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_I_CHAR); + concat_p = ecma_concat_ecma_strings (output_str_p, ic_flag_str_p); + ecma_deref_ecma_string (output_str_p); + ecma_deref_ecma_string (ic_flag_str_p); + output_str_p = concat_p; + } + + /* Check the global flag */ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_MULTILINE); + ecma_property_t *multiline_prop_p = ecma_op_object_get_property (obj_p, magic_string_p); + ecma_deref_ecma_string (magic_string_p); + + if (ecma_is_value_true (multiline_prop_p->u.named_data_property.value)) + { + ecma_string_t *m_flag_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_M_CHAR); + concat_p = ecma_concat_ecma_strings (output_str_p, m_flag_str_p); + ecma_deref_ecma_string (output_str_p); + ecma_deref_ecma_string (m_flag_str_p); + output_str_p = concat_p; + } + + ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_str_p)); + + ECMA_FINALIZE (obj_this); + } + + return ret_value; +} /* ecma_builtin_regexp_prototype_to_string */ + +/** + * @} + * @} + * @} + */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h new file mode 100644 index 0000000000..232591597d --- /dev/null +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h @@ -0,0 +1,52 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * RegExp.prototype built-in description + */ + +#ifndef OBJECT_ID +# define OBJECT_ID(builtin_object_id) +#endif /* !OBJECT_ID */ + +#ifndef OBJECT_VALUE +# define OBJECT_VALUE(name, obj_getter, prop_writable, prop_enumerable, prop_configurable) +#endif /* !OBJECT_VALUE */ + +#ifndef ROUTINE +# define ROUTINE(name, c_function_name, args_number, length_prop_value) +#endif /* !ROUTINE */ + +/* Object identifier */ +OBJECT_ID (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE) + +OBJECT_VALUE (ECMA_MAGIC_STRING_CONSTRUCTOR, + ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP), + ECMA_PROPERTY_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_CONFIGURABLE) + +ROUTINE (ECMA_MAGIC_STRING_EXEC, ecma_builtin_regexp_prototype_exec, 1, 1) +ROUTINE (ECMA_MAGIC_STRING_TEST, ecma_builtin_regexp_prototype_test, 1, 1) +ROUTINE (ECMA_MAGIC_STRING_TO_STRING_UL, ecma_builtin_regexp_prototype_to_string, 0, 0) + +#undef OBJECT_ID +#undef SIMPLE_VALUE +#undef NUMBER_VALUE +#undef STRING_VALUE +#undef OBJECT_VALUE +#undef CP_UNIMPLEMENTED_VALUE +#undef ROUTINE diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.cpp new file mode 100644 index 0000000000..35b7e75a1a --- /dev/null +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.cpp @@ -0,0 +1,142 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecma-alloc.h" +#include "ecma-builtins.h" +#include "ecma-conversion.h" +#include "ecma-exceptions.h" +#include "ecma-helpers.h" +#include "ecma-objects.h" +#include "ecma-regexp-object.h" +#include "ecma-try-catch-macro.h" + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +#define ECMA_BUILTINS_INTERNAL +#include "ecma-builtins-internal.h" + +#define BUILTIN_INC_HEADER_NAME "ecma-builtin-regexp.inc.h" +#define BUILTIN_UNDERSCORED_ID regexp +#include "ecma-builtin-internal-routines-template.inc.h" + +/** \addtogroup ecma ECMA + * @{ + * + * \addtogroup ecmabuiltins + * @{ + * + * \addtogroup regexp ECMA RegExp object built-in + * @{ + */ + +/** + * Handle calling [[Call]] of built-in RegExp object + * + * @return completion-value + */ +ecma_completion_value_t +ecma_builtin_regexp_dispatch_call (const ecma_value_t *arguments_list_p, /**< arguments list */ + ecma_length_t arguments_list_len) /**< number of arguments */ +{ + return ecma_builtin_regexp_dispatch_construct (arguments_list_p, arguments_list_len); +} /* ecma_builtin_regexp_dispatch_call */ + +/** + * Handle calling [[Construct]] of built-in RegExp object + * + * @return completion-value + */ +ecma_completion_value_t +ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /**< arguments list */ + ecma_length_t arguments_list_len) /**< number of arguments */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + ecma_value_t pattern_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_UNDEFINED); + ecma_value_t flags_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_UNDEFINED); + + if (arguments_list_len > 0) + { + /* pattern string or RegExp object */ + pattern_value = arguments_list_p[0]; + + if (arguments_list_len > 1) + { + flags_value = arguments_list_p[1]; + } + } + + if (arguments_list_len == 0) + { + ecma_string_t *magic_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP); + ret_value = ecma_op_create_regexp_object (magic_str_p, NULL); + ecma_deref_ecma_string (magic_str_p); + } + else if (ecma_is_value_object (pattern_value) + && ecma_object_get_class_name (ecma_get_object_from_value (pattern_value)) == ECMA_MAGIC_STRING_REGEXP_UL) + { + if (arguments_list_len == 1 + || (arguments_list_len > 1 && ecma_is_value_undefined (flags_value))) + { + ret_value = ecma_make_normal_completion_value (ecma_copy_value (pattern_value, true)); + } + else + { + ret_value = ecma_raise_type_error ((const ecma_char_t *) "Invalid argument of RegExp call."); + } + } + else + { + ECMA_TRY_CATCH (regexp_str_value, + ecma_op_to_string (pattern_value), + ret_value); + + ecma_string_t *pattern_string_p = ecma_get_string_from_value (regexp_str_value); + + ecma_string_t *flags_string_p = NULL; + + if (arguments_list_len > 1) + { + ECMA_TRY_CATCH (flags_str_value, + ecma_op_to_string (flags_value), + ret_value); + + flags_string_p = ecma_copy_or_ref_ecma_string (ecma_get_string_from_value (flags_str_value)); + ECMA_FINALIZE (flags_str_value); + } + + if (ecma_is_completion_value_empty (ret_value)) + { + ret_value = ecma_op_create_regexp_object (pattern_string_p, flags_string_p); + } + + if (flags_string_p != NULL) + { + ecma_deref_ecma_string (flags_string_p); + } + + ECMA_FINALIZE (regexp_str_value); + } + + return ret_value; +} /* ecma_builtin_regexp_dispatch_construct */ + +/** + * @} + * @} + * @} + */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.inc.h new file mode 100644 index 0000000000..1170cb009f --- /dev/null +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.inc.h @@ -0,0 +1,97 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * RegExp built-in description + */ + +#ifndef OBJECT_ID +# define OBJECT_ID(builtin_object_id) +#endif /* !OBJECT_ID */ + +#ifndef OBJECT_VALUE +# define OBJECT_VALUE(name, obj_getter, prop_writable, prop_enumerable, prop_configurable) +#endif /* !OBJECT_VALUE */ + +#ifndef NUMBER_VALUE +# define NUMBER_VALUE(name, number_value, prop_writable, prop_enumerable, prop_configurable) +#endif /* !NUMBER_VALUE */ + +#ifndef SIMPLE_VALUE +# define SIMPLE_VALUE(name, simple_value, prop_writable, prop_enumerable, prop_configurable) +#endif /* !SIMPLE_VALUE */ + +#ifndef STRING_VALUE +# define STRING_VALUE(name, magic_string_id, prop_writable, prop_enumerable, prop_configurable) +#endif /* !STRING_VALUE */ + +/* Object identifier */ +OBJECT_ID (ECMA_BUILTIN_ID_REGEXP) + +// ECMA-262 v5, 15.10.5.1 +OBJECT_VALUE (ECMA_MAGIC_STRING_PROTOTYPE, + ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE), + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +// ECMA-262 v5, 15.10.7.1 +STRING_VALUE (ECMA_MAGIC_STRING_SOURCE, + ECMA_MAGIC_STRING_REGEXP_SOURCE_UL, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +// ECMA-262 v5, 15.10.7.2 +SIMPLE_VALUE (ECMA_MAGIC_STRING_GLOBAL, + ECMA_SIMPLE_VALUE_FALSE, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +// ECMA-262 v5, 15.10.7.3 +SIMPLE_VALUE (ECMA_MAGIC_STRING_IGNORECASE_UL, + ECMA_SIMPLE_VALUE_FALSE, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) +// ECMA-262 v5, 15.10.7.4 +SIMPLE_VALUE (ECMA_MAGIC_STRING_MULTILINE, + ECMA_SIMPLE_VALUE_FALSE, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +// ECMA-262 v5, 15.10.7.5 +NUMBER_VALUE (ECMA_MAGIC_STRING_LASTINDEX_UL, + 0, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +NUMBER_VALUE (ECMA_MAGIC_STRING_LENGTH, + 2, + ECMA_PROPERTY_NOT_WRITABLE, + ECMA_PROPERTY_NOT_ENUMERABLE, + ECMA_PROPERTY_NOT_CONFIGURABLE) + +#undef OBJECT_ID +#undef SIMPLE_VALUE +#undef NUMBER_VALUE +#undef STRING_VALUE +#undef OBJECT_VALUE +#undef CP_UNIMPLEMENTED_VALUE +#undef ROUTINE diff --git a/jerry-core/ecma/builtin-objects/ecma-builtins.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtins.inc.h index 285588f750..a2a7abca61 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtins.inc.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtins.inc.h @@ -148,6 +148,24 @@ BUILTIN (ECMA_BUILTIN_ID_DATE, date) #endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_DATE_BUILTIN*/ +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN +/* The RegExp.prototype object (15.10.6) */ +BUILTIN (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE, + ECMA_OBJECT_TYPE_GENERAL, + ECMA_BUILTIN_ID_OBJECT_PROTOTYPE, + true, + true, + regexp_prototype) + +/* The RegExp object (15.10) */ +BUILTIN (ECMA_BUILTIN_ID_REGEXP, + ECMA_OBJECT_TYPE_FUNCTION, + ECMA_BUILTIN_ID_FUNCTION_PROTOTYPE, + true, + true, + regexp) +#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ + #ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ERROR_BUILTINS /* The Error.prototype object (15.11.4) */ BUILTIN (ECMA_BUILTIN_ID_ERROR_PROTOTYPE, diff --git a/jerry-core/ecma/operations/ecma-objects.cpp b/jerry-core/ecma/operations/ecma-objects.cpp index f6dd435dc2..a8af6c535b 100644 --- a/jerry-core/ecma/operations/ecma-objects.cpp +++ b/jerry-core/ecma/operations/ecma-objects.cpp @@ -681,6 +681,12 @@ ecma_object_get_class_name (ecma_object_t *obj_p) /**< object */ return ECMA_MAGIC_STRING_ERROR_UL; } #endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ERROR_BUILTINS */ +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + case ECMA_BUILTIN_ID_REGEXP_PROTOTYPE: + { + return ECMA_MAGIC_STRING_REGEXP_UL; + } +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ default: { JERRY_ASSERT (ecma_builtin_is (obj_p, ECMA_BUILTIN_ID_GLOBAL)); diff --git a/jerry-core/ecma/operations/ecma-regexp-object.cpp b/jerry-core/ecma/operations/ecma-regexp-object.cpp new file mode 100644 index 0000000000..0d43b046e1 --- /dev/null +++ b/jerry-core/ecma/operations/ecma-regexp-object.cpp @@ -0,0 +1,1329 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecma-alloc.h" +#include "ecma-array-object.h" +#include "ecma-exceptions.h" +#include "ecma-gc.h" +#include "ecma-globals.h" +#include "ecma-objects.h" +#include "ecma-regexp-object.h" +#include "ecma-try-catch-macro.h" +#include "jrt-libc-includes.h" +#include "re-compiler.h" + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +#define ECMA_BUILTINS_INTERNAL +#include "ecma-builtins-internal.h" + +/** \addtogroup ecma ECMA + * @{ + * + * \addtogroup ecmaregexpobject ECMA RegExp object related routines + * @{ + */ + +/* + * RegExp results are stored in an array of string pointers. If N is the number + * of groups then the length of the array is 2*N, because every group has a start + * and end. We have to handle those pointers. + * + * [0] RE global start + * [1] RE global end + * [2] 1st group start + * [3] 1st group end + * ... + * [n] n/2 th group start + * [n+1] n/2 th group end + */ +#define RE_GLOBAL_START_IDX 0 +#define RE_GLOBAL_END_IDX 1 + +/* RegExp flags */ +#define RE_FLAG_GLOBAL (1 << 0) /* ECMA-262 v5, 15.10.7.2 */ +#define RE_FLAG_IGNORE_CASE (1 << 1) /* ECMA-262 v5, 15.10.7.3 */ +#define RE_FLAG_MULTILINE (1 << 2) /* ECMA-262 v5, 15.10.7.4 */ + +/** + * Parse RegExp flags (global, ignoreCase, multiline) + * + * See also: ECMA-262 v5, 15.10.4.1 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +static ecma_completion_value_t +re_parse_regexp_flags (ecma_string_t *flags_str_p, /**< Input string with flags */ + uint8_t *flags_p) /**< Output: parsed flag bits */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + int32_t flags_str_len = ecma_string_get_length (flags_str_p); + MEM_DEFINE_LOCAL_ARRAY (flags_start_p, flags_str_len + 1, ecma_char_t); + ssize_t zt_str_size = (ssize_t) sizeof (ecma_char_t) * (flags_str_len + 1); + ecma_string_to_zt_string (flags_str_p, flags_start_p, zt_str_size); + + ecma_char_t *flags_char_p = flags_start_p; + for (int ch_cnt = 1; flags_char_p + && ch_cnt < zt_str_size + && ecma_is_completion_value_empty (ret_value); ch_cnt++) + { + switch (*flags_char_p) + { + case 'g': + { + if (*flags_p & RE_FLAG_GLOBAL) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid RegExp flags."); + } + *flags_p |= RE_FLAG_GLOBAL; + break; + } + case 'i': + { + if (*flags_p & RE_FLAG_IGNORE_CASE) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid RegExp flags."); + } + *flags_p |= RE_FLAG_IGNORE_CASE; + break; + } + case 'm': + { + if (*flags_p & RE_FLAG_MULTILINE) + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid RegExp flags."); + } + *flags_p |= RE_FLAG_MULTILINE; + break; + } + default: + { + ret_value = ecma_raise_syntax_error ((const ecma_char_t *) "Invalid RegExp flags."); + break; + } + } + flags_char_p++; + } + + MEM_FINALIZE_LOCAL_ARRAY (flags_start_p); + + return ret_value; +} /* re_parse_regexp_flags */ + +/** + * RegExp object creation operation. + * + * See also: ECMA-262 v5, 15.10.4.1 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_op_create_regexp_object (ecma_string_t *pattern_p, /**< input pattern */ + ecma_string_t *flags_str_p) /**< flags */ +{ + JERRY_ASSERT (pattern_p != NULL); + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + uint8_t flags = 0; + if (flags_str_p != NULL) + { + ECMA_TRY_CATCH (empty, re_parse_regexp_flags (flags_str_p, &flags), ret_value); + ECMA_FINALIZE (empty); + + if (!ecma_is_completion_value_empty (ret_value)) + { + return ret_value; + } + } + + ecma_object_t *re_prototype_obj_p = ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE); + + ecma_object_t *obj_p = ecma_create_object (re_prototype_obj_p, true, ECMA_OBJECT_TYPE_GENERAL); + ecma_deref_object (re_prototype_obj_p); + + /* Set the internal [[Class]] property */ + ecma_property_t *class_prop_p = ecma_create_internal_property (obj_p, ECMA_INTERNAL_PROPERTY_CLASS); + class_prop_p->u.internal_property.value = ECMA_MAGIC_STRING_REGEXP_UL; + + /* Set source property. ECMA-262 v5, 15.10.7.1 */ + ecma_string_t *magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_SOURCE); + ecma_property_t *source_prop_p = ecma_create_named_data_property (obj_p, + magic_string_p, + false, false, false); + ecma_deref_ecma_string (magic_string_p); + ecma_set_named_data_property_value (source_prop_p, + ecma_make_string_value (ecma_copy_or_ref_ecma_string (pattern_p))); + + ecma_simple_value_t prop_value; + + /* Set global property. ECMA-262 v5, 15.10.7.2*/ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_GLOBAL); + ecma_property_t *global_prop_p = ecma_create_named_data_property (obj_p, + magic_string_p, + false, false, false); + ecma_deref_ecma_string (magic_string_p); + prop_value = flags & RE_FLAG_GLOBAL ? ECMA_SIMPLE_VALUE_TRUE : ECMA_SIMPLE_VALUE_FALSE; + ecma_set_named_data_property_value (global_prop_p, ecma_make_simple_value (prop_value)); + + /* Set ignoreCase property. ECMA-262 v5, 15.10.7.3*/ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_IGNORECASE_UL); + ecma_property_t *ignorecase_prop_p = ecma_create_named_data_property (obj_p, + magic_string_p, + false, false, false); + ecma_deref_ecma_string (magic_string_p); + prop_value = flags & RE_FLAG_IGNORE_CASE ? ECMA_SIMPLE_VALUE_TRUE : ECMA_SIMPLE_VALUE_FALSE; + ecma_set_named_data_property_value (ignorecase_prop_p, ecma_make_simple_value (prop_value)); + + + /* Set multiline property. ECMA-262 v5, 15.10.7.4*/ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_MULTILINE); + ecma_property_t *multiline_prop_p = ecma_create_named_data_property (obj_p, + magic_string_p, + false, false, false); + ecma_deref_ecma_string (magic_string_p); + prop_value = flags & RE_FLAG_MULTILINE ? ECMA_SIMPLE_VALUE_TRUE : ECMA_SIMPLE_VALUE_FALSE; + ecma_set_named_data_property_value (multiline_prop_p, ecma_make_simple_value (prop_value)); + + /* Set lastIndex property. ECMA-262 v5, 15.10.7.5*/ + magic_string_p = ecma_get_magic_string (ECMA_MAGIC_STRING_LASTINDEX_UL); + ecma_property_t *lastindex_prop_p = ecma_create_named_data_property (obj_p, + magic_string_p, + true, false, false); + ecma_deref_ecma_string (magic_string_p); + + ecma_number_t *lastindex_num_p = ecma_alloc_number (); + *lastindex_num_p = ECMA_NUMBER_ZERO; + ecma_named_data_property_assign_value (obj_p, lastindex_prop_p, ecma_make_number_value (lastindex_num_p)); + ecma_dealloc_number (lastindex_num_p); + + /* Set bytecode internal property. */ + ecma_property_t *bytecode = ecma_create_internal_property (obj_p, ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE); + + /* Compile bytecode. */ + ECMA_TRY_CATCH (empty, re_compile_bytecode (bytecode, pattern_p, flags), ret_value); + ret_value = ecma_make_normal_completion_value (ecma_make_object_value (obj_p)); + ECMA_FINALIZE (empty); + + if (ecma_is_completion_value_throw (ret_value)) + { + ecma_deref_object (obj_p); + } + + return ret_value; +} /* ecma_op_create_regexp_object */ + +/** + * Backtrack a unicode character + */ +static const ecma_char_t * +utf8_backtrack (const ecma_char_t *str_p) +{ + /* FIXME: change to string iterator with unicode support, when it would be implemented */ + return --str_p; +} /* utf8_backtrack */ + +/** + * Helper to get an input character and increase string pointer. + */ +static ecma_char_t +get_input_char (const ecma_char_t** char_p) +{ + /* FIXME: change to string iterator with unicode support, when it would be implemented */ + const ecma_char_t ch = **char_p; + (*char_p)++; + return ch; +} /* get_input_char */ + +/** + * Helper to get current input character, won't increase string pointer. + */ +static ecma_char_t +lookup_input_char (const ecma_char_t *str_p) +{ + /* FIXME: change to string iterator with unicode support, when it would be implemented */ + return *str_p; +} /* lookup_input_char */ + +/** + * Helper to get previous input character, won't decrease string pointer. + */ +static ecma_char_t +lookup_prev_char (const ecma_char_t *str_p) +{ + /* FIXME: change to string iterator with unicode support, when it would be implemented */ + return *(--str_p); +} /* lookup_prev_char */ + +/** + * Recursive function for RegExp matching. Tests for a regular expression + * match and returns a MatchResult value. + * + * See also: + * ECMA-262 v5, 15.10.2.1 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +static ecma_completion_value_t +re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */ + re_bytecode_t *bc_p, /**< pointer to the current RegExp bytecode */ + const ecma_char_t *str_p, /**< pointer to the current input character */ + const ecma_char_t **res_p) /**< pointer to the matching substring */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + re_opcode_t op; + + if (re_ctx_p->recursion_depth >= RE_EXECUTE_RECURSION_LIMIT) + { + ret_value = ecma_raise_range_error ((const ecma_char_t *) "RegExp executor recursion limit is exceeded."); + return ret_value; + } + re_ctx_p->recursion_depth++; + + while ((op = re_get_opcode (&bc_p))) + { + if (re_ctx_p->match_limit >= RE_EXECUTE_MATCH_LIMIT) + { + ret_value = ecma_raise_range_error ((const ecma_char_t *) "RegExp executor steps limit is exceeded."); + return ret_value; + } + re_ctx_p->match_limit++; + + switch (op) + { + case RE_OP_MATCH: + { + JERRY_DDLOG ("Execute RE_OP_MATCH: match\n"); + *res_p = str_p; + re_ctx_p->recursion_depth--; + ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); + return ret_value; /* match */ + } + case RE_OP_CHAR: + { + uint32_t ch1 = re_get_value (&bc_p); + uint32_t ch2 = get_input_char (&str_p); + JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2); + + if (ch2 == '\0' || ch1 != ch2) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + case RE_OP_PERIOD: + { + uint32_t ch1 = get_input_char (&str_p); + JERRY_DDLOG ("Period matching '.' to %d: ", ch1); + if (ch1 == '\n' || ch1 == '\0') + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + case RE_OP_ASSERT_START: + { + JERRY_DDLOG ("Execute RE_OP_ASSERT_START: "); + + if (str_p <= re_ctx_p->input_start_p) + { + JERRY_DDLOG ("match\n"); + break; + } + + if (!(re_ctx_p->flags & RE_FLAG_MULTILINE)) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + + if (ecma_char_is_line_terminator (lookup_prev_char (str_p))) + { + JERRY_DDLOG ("match\n"); + break; + } + + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_ASSERT_END: + { + JERRY_DDLOG ("Execute RE_OP_ASSERT_END: "); + + if (str_p >= re_ctx_p->input_end_p) + { + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + + if (!(re_ctx_p->flags & RE_FLAG_MULTILINE)) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + + if (ecma_char_is_line_terminator (lookup_input_char (str_p))) + { + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_ASSERT_WORD_BOUNDARY: + case RE_OP_ASSERT_NOT_WORD_BOUNDARY: + { + bool is_wordchar_left, is_wordchar_right; + + if (str_p <= re_ctx_p->input_start_p) + { + is_wordchar_left = false; /* not a wordchar */ + } + else + { + is_wordchar_left = ecma_char_is_word_char (lookup_prev_char (str_p)); + } + + if (str_p >= re_ctx_p->input_end_p) + { + is_wordchar_right = false; /* not a wordchar */ + } + else + { + is_wordchar_right = ecma_char_is_word_char (lookup_input_char (str_p)); + } + + if (op == RE_OP_ASSERT_WORD_BOUNDARY) + { + JERRY_DDLOG ("Execute RE_OP_ASSERT_WORD_BOUNDARY at %c: ", *str_p); + if (is_wordchar_left == is_wordchar_right) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + else + { + JERRY_ASSERT (op == RE_OP_ASSERT_NOT_WORD_BOUNDARY); + JERRY_DDLOG ("Execute RE_OP_ASSERT_NOT_WORD_BOUNDARY at %c: ", *str_p); + + if (is_wordchar_left != is_wordchar_right) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + case RE_OP_LOOKAHEAD_POS: + case RE_OP_LOOKAHEAD_NEG: + { + ecma_completion_value_t match_value = ecma_make_empty_completion_value (); + const ecma_char_t *sub_str_p = NULL; + + MEM_DEFINE_LOCAL_ARRAY (saved_bck_p, re_ctx_p->num_of_captures, ecma_char_t *); + size_t size = (size_t) (re_ctx_p->num_of_captures) * sizeof (const ecma_char_t *); + memcpy (saved_bck_p, re_ctx_p->saved_p, size); + + do + { + uint32_t offset = re_get_value (&bc_p); + if (!sub_str_p) + { + match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_completion_value_throw (match_value)) + { + break; + } + } + bc_p += offset; + } + while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); + + if (!ecma_is_completion_value_throw (match_value)) + { + JERRY_DDLOG ("Execute RE_OP_LOOKAHEAD_POS/NEG: "); + ecma_free_completion_value (match_value); + if ((op == RE_OP_LOOKAHEAD_POS && sub_str_p) + || (op == RE_OP_LOOKAHEAD_NEG && !sub_str_p)) + { + JERRY_DDLOG ("match\n"); + match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + } + else + { + JERRY_DDLOG ("fail\n"); + match_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + + if (!ecma_is_completion_value_throw (match_value)) + { + re_ctx_p->recursion_depth--; + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + } + else + { + JERRY_ASSERT (ecma_is_value_boolean (match_value)); + /* restore saved */ + memcpy (re_ctx_p->saved_p, saved_bck_p, size); + } + } + + MEM_FINALIZE_LOCAL_ARRAY (saved_bck_p); + return match_value; + } + case RE_OP_CHAR_CLASS: + case RE_OP_INV_CHAR_CLASS: + { + uint32_t curr_ch, num_of_ranges; + bool is_match; + + JERRY_DDLOG ("Execute RE_OP_CHAR_CLASS/RE_OP_INV_CHAR_CLASS, "); + + if (str_p >= re_ctx_p->input_end_p) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + + curr_ch = get_input_char (&str_p); + + num_of_ranges = re_get_value (&bc_p); + is_match = false; + while (num_of_ranges) + { + uint32_t ch1, ch2; + ch1 = (uint32_t) re_get_value (&bc_p); + ch2 = (uint32_t) re_get_value (&bc_p); + JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ", + num_of_ranges, ch1, ch2, curr_ch); + + if (curr_ch >= ch1 && curr_ch <= ch2) + { + /* We must read all the ranges from bytecode. */ + is_match = true; + } + num_of_ranges--; + } + + if (op == RE_OP_CHAR_CLASS) + { + if (!is_match) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + else + { + JERRY_ASSERT (op == RE_OP_INV_CHAR_CLASS); + if (is_match) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + case RE_OP_BACKREFERENCE: + { + uint32_t backref_idx; + const ecma_char_t *sub_str_p; + + backref_idx = re_get_value (&bc_p); + JERRY_DDLOG ("Execute RE_OP_BACKREFERENCE (idx: %d): ", backref_idx); + backref_idx *= 2; /* backref n -> saved indices [n*2, n*2+1] */ + JERRY_ASSERT (backref_idx >= 2 && backref_idx + 1 < re_ctx_p->num_of_captures); + + if (!re_ctx_p->saved_p[backref_idx] || !re_ctx_p->saved_p[backref_idx + 1]) + { + JERRY_DDLOG ("match\n"); + break; /* capture is 'undefined', always matches! */ + } + + sub_str_p = re_ctx_p->saved_p[backref_idx]; + while (sub_str_p < re_ctx_p->saved_p[backref_idx + 1]) + { + uint32_t ch1, ch2; + + if (str_p >= re_ctx_p->input_end_p) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + + ch1 = get_input_char (&sub_str_p); + ch2 = get_input_char (&str_p); + + if (ch1 != ch2) + { + JERRY_DDLOG ("fail\n"); + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + } + JERRY_DDLOG ("match\n"); + break; /* tail merge */ + } + case RE_OP_SAVE_AT_START: + { + const ecma_char_t *old_start_p; + re_bytecode_t *old_bc_p; + + JERRY_DDLOG ("Execute RE_OP_SAVE_AT_START\n"); + old_start_p = re_ctx_p->saved_p[RE_GLOBAL_START_IDX]; + re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = str_p; + do + { + uint32_t offset = re_get_value (&bc_p); + const ecma_char_t *sub_str_p; + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + bc_p += offset; + old_bc_p = bc_p; + } + while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); + bc_p = old_bc_p; + + re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p; + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_SAVE_AND_MATCH: + { + JERRY_DDLOG ("End of pattern is reached: match\n"); + re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = str_p; + *res_p = str_p; + re_ctx_p->recursion_depth--; + return ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_TRUE); /* match */ + } + case RE_OP_ALTERNATIVE: + { + /* + * Alternatives should be jump over, when alternative opcode appears. + */ + uint32_t offset = re_get_value (&bc_p); + JERRY_DDLOG ("Execute RE_OP_ALTERNATIVE"); + bc_p += offset; + while (*bc_p == RE_OP_ALTERNATIVE) + { + JERRY_DDLOG (", jump: %d"); + bc_p++; + offset = re_get_value (&bc_p); + bc_p += offset; + } + JERRY_DDLOG ("\n"); + break; /* tail merge */ + } + case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START: + case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START: + { + /* + * On non-greedy iterations we have to execute the bytecode + * after the group first, if zero iteration is allowed. + */ + uint32_t start_idx, iter_idx, offset; + const ecma_char_t *old_start_p; + const ecma_char_t *sub_str_p; + re_bytecode_t *old_bc_p; + + old_bc_p = bc_p; /* save the bytecode start position of the group start */ + start_idx = re_get_value (&bc_p); + offset = re_get_value (&bc_p); + + if (RE_IS_CAPTURE_GROUP (op)) + { + JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2); + iter_idx = start_idx - 1; + start_idx *= 2; + + old_start_p = re_ctx_p->saved_p[start_idx]; + re_ctx_p->saved_p[start_idx] = str_p; + } + else + { + JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures); + iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1; + start_idx += re_ctx_p->num_of_captures; + } + re_ctx_p->num_of_iterations[iter_idx] = 0; + + /* Jump all over to the end of the END opcode. */ + bc_p += offset; + + /* Try to match after the close paren if zero is allowed */ + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + if (RE_IS_CAPTURE_GROUP (op)) + { + re_ctx_p->saved_p[start_idx] = old_start_p; + } + + bc_p = old_bc_p; + /* FALLTHRU */ + } + case RE_OP_CAPTURE_GROUP_START: + case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START: + case RE_OP_NON_CAPTURE_GROUP_START: + case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START: + { + uint32_t start_idx, iter_idx, old_iteration_cnt, offset; + const ecma_char_t *old_start_p; + const ecma_char_t *sub_str_p; + re_bytecode_t *old_bc_p; + re_bytecode_t *end_bc_p = NULL; + + start_idx = re_get_value (&bc_p); + if (op != RE_OP_CAPTURE_GROUP_START + && op != RE_OP_NON_CAPTURE_GROUP_START) + { + offset = re_get_value (&bc_p); + end_bc_p = bc_p + offset; + } + + if (RE_IS_CAPTURE_GROUP (op)) + { + JERRY_ASSERT (start_idx <= re_ctx_p->num_of_captures / 2); + iter_idx = start_idx - 1; + start_idx *= 2; + } + else + { + JERRY_ASSERT (start_idx < re_ctx_p->num_of_non_captures); + iter_idx = start_idx + (re_ctx_p->num_of_captures / 2) - 1; + start_idx += re_ctx_p->num_of_captures; + } + old_start_p = re_ctx_p->saved_p[start_idx]; + old_iteration_cnt = re_ctx_p->num_of_iterations[iter_idx]; + re_ctx_p->saved_p[start_idx] = str_p; + re_ctx_p->num_of_iterations[iter_idx] = 0; + + do + { + offset = re_get_value (&bc_p); + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + bc_p += offset; + old_bc_p = bc_p; + } + while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE); + bc_p = old_bc_p; + re_ctx_p->num_of_iterations[iter_idx] = old_iteration_cnt; + + /* Try to match after the close paren if zero is allowed. */ + if (op == RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START + || op == RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START) + { + JERRY_ASSERT (end_bc_p); + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, end_bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + } + + re_ctx_p->saved_p[start_idx] = old_start_p; + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_CAPTURE_NON_GREEDY_GROUP_END: + case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END: + { + uint32_t end_idx, iter_idx, min, max; + const ecma_char_t *old_end_p; + re_bytecode_t *old_bc_p; + + /* + * On non-greedy iterations we have to execute the bytecode + * after the group first. Try to iterate only if it fails. + */ + old_bc_p = bc_p; /* save the bytecode start position of the group end */ + end_idx = re_get_value (&bc_p); + min = re_get_value (&bc_p); + max = re_get_value (&bc_p); + re_get_value (&bc_p); /* start offset */ + + if (RE_IS_CAPTURE_GROUP (op)) + { + JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2); + iter_idx = end_idx - 1; + end_idx = (end_idx * 2) + 1; + } + else + { + JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures); + iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1; + end_idx += re_ctx_p->num_of_captures; + } + + re_ctx_p->num_of_iterations[iter_idx]++; + if (re_ctx_p->num_of_iterations[iter_idx] >= min + && re_ctx_p->num_of_iterations[iter_idx] <= max) + { + old_end_p = re_ctx_p->saved_p[end_idx]; + re_ctx_p->saved_p[end_idx] = str_p; + + const ecma_char_t *sub_str_p; + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + + re_ctx_p->saved_p[end_idx] = old_end_p; + } + re_ctx_p->num_of_iterations[iter_idx]--; + bc_p = old_bc_p; + + /* If non-greedy fails and try to iterate... */ + /* FALLTHRU */ + } + case RE_OP_CAPTURE_GREEDY_GROUP_END: + case RE_OP_NON_CAPTURE_GREEDY_GROUP_END: + { + uint32_t start_idx, end_idx, iter_idx, min, max, offset; + const ecma_char_t *old_start_p; + const ecma_char_t *old_end_p; + const ecma_char_t *sub_str_p; + re_bytecode_t *old_bc_p; + + end_idx = re_get_value (&bc_p); + min = re_get_value (&bc_p); + max = re_get_value (&bc_p); + offset = re_get_value (&bc_p); + + if (RE_IS_CAPTURE_GROUP (op)) + { + JERRY_ASSERT (end_idx <= re_ctx_p->num_of_captures / 2); + iter_idx = end_idx - 1; + start_idx = end_idx * 2; + end_idx = start_idx + 1; + } + else + { + JERRY_ASSERT (end_idx <= re_ctx_p->num_of_non_captures); + iter_idx = end_idx + (re_ctx_p->num_of_captures / 2) - 1; + end_idx += re_ctx_p->num_of_captures; + start_idx = end_idx; + } + + /* Check the empty iteration if the minimum number of iterations is reached. */ + if (re_ctx_p->num_of_iterations[iter_idx] >= min + && str_p == re_ctx_p->saved_p[start_idx]) + { + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + re_ctx_p->num_of_iterations[iter_idx]++; + + old_bc_p = bc_p; /* Save the bytecode end position of the END opcodes for matching after it. */ + old_end_p = re_ctx_p->saved_p[end_idx]; + re_ctx_p->saved_p[end_idx] = str_p; + + if (re_ctx_p->num_of_iterations[iter_idx] < max) + { + bc_p -= offset; + offset = re_get_value (&bc_p); + + old_start_p = re_ctx_p->saved_p[start_idx]; + re_ctx_p->saved_p[start_idx] = str_p; + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + + re_ctx_p->saved_p[start_idx] = old_start_p; + + /* Try to match alternatives if any. */ + bc_p += offset; + while (*bc_p == RE_OP_ALTERNATIVE) + { + bc_p++; /* RE_OP_ALTERNATIVE */ + offset = re_get_value (&bc_p); + + old_start_p = re_ctx_p->saved_p[start_idx]; + re_ctx_p->saved_p[start_idx] = str_p; + + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + + re_ctx_p->saved_p[start_idx] = old_start_p; + bc_p += offset; + } + } + + if (re_ctx_p->num_of_iterations[iter_idx] >= min + && re_ctx_p->num_of_iterations[iter_idx] <= max) + { + /* Try to match the rest of the bytecode. */ + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, old_bc_p, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + } + + /* restore if fails */ + re_ctx_p->saved_p[end_idx] = old_end_p; + re_ctx_p->num_of_iterations[iter_idx]--; + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_NON_GREEDY_ITERATOR: + { + uint32_t min, max, offset, num_of_iter; + const ecma_char_t *sub_str_p; + + min = re_get_value (&bc_p); + max = re_get_value (&bc_p); + + offset = re_get_value (&bc_p); + JERRY_DDLOG ("Non-greedy iterator, min=%lu, max=%lu, offset=%ld\n", + (unsigned long) min, (unsigned long) max, (long) offset); + + num_of_iter = 0; + while (num_of_iter <= max) + { + if (num_of_iter >= min) + { + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + } + + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (!ecma_is_value_true (match_value)) + { + break; + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + str_p = sub_str_p; + num_of_iter++; + } + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + case RE_OP_GREEDY_ITERATOR: + { + uint32_t min, max, offset, num_of_iter; + const ecma_char_t *sub_str_p; + + min = re_get_value (&bc_p); + max = re_get_value (&bc_p); + + offset = re_get_value (&bc_p); + JERRY_DDLOG ("Greedy iterator, min=%lu, max=%lu, offset=%ld\n", + (unsigned long) min, (unsigned long) max, (long) offset); + + num_of_iter = 0; + while (num_of_iter < max) + { + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p, str_p, &sub_str_p); + if (!ecma_is_value_true (match_value)) + { + break; + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + str_p = sub_str_p; + num_of_iter++; + } + + while (num_of_iter >= min) + { + ecma_completion_value_t match_value = re_match_regexp (re_ctx_p, bc_p + offset, str_p, &sub_str_p); + if (ecma_is_value_true (match_value)) + { + *res_p = sub_str_p; + re_ctx_p->recursion_depth--; + return match_value; /* match */ + } + else if (ecma_is_completion_value_throw (match_value)) + { + return match_value; + } + if (num_of_iter == min) + { + break; + } + + str_p = utf8_backtrack (str_p); + num_of_iter--; + } + re_ctx_p->recursion_depth--; + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ + } + default: + { + JERRY_DDLOG ("UNKNOWN opcode (%d)!\n", (uint32_t) op); + re_ctx_p->recursion_depth--; + return ecma_make_throw_obj_completion_value (ecma_new_standard_error (ECMA_ERROR_COMMON)); + } + } + } + + JERRY_UNREACHABLE (); + return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */ +} /* regexp_match */ + +/** + * Define the necessary properties for the result array (index, input, length). + */ +static void +re_set_result_array_properties (ecma_object_t *array_obj_p, /**< result array */ + re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */ + int32_t index) /** index of matching */ +{ + /* Set index property of the result array */ + ecma_string_t *result_prop_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_INDEX); + { + ecma_property_descriptor_t array_item_prop_desc = ecma_make_empty_property_descriptor (); + + array_item_prop_desc.is_value_defined = true; + + ecma_number_t *num_p = ecma_alloc_number (); + *num_p = (ecma_number_t) index; + array_item_prop_desc.value = ecma_make_number_value (num_p); + + array_item_prop_desc.is_writable_defined = true; + array_item_prop_desc.is_writable = true; + + array_item_prop_desc.is_enumerable_defined = true; + array_item_prop_desc.is_enumerable = true; + + array_item_prop_desc.is_configurable_defined = true; + array_item_prop_desc.is_configurable = true; + + ecma_op_object_define_own_property (array_obj_p, + result_prop_str_p, + &array_item_prop_desc, + true); + + ecma_dealloc_number (num_p); + } + ecma_deref_ecma_string (result_prop_str_p); + + /* Set input property of the result array */ + result_prop_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_INPUT); + { + ecma_property_descriptor_t array_item_prop_desc = ecma_make_empty_property_descriptor (); + + array_item_prop_desc.is_value_defined = true; + ecma_string_t *input_str_p = ecma_new_ecma_string (re_ctx_p->input_start_p); + array_item_prop_desc.value = ecma_make_string_value (input_str_p); + + array_item_prop_desc.is_writable_defined = true; + array_item_prop_desc.is_writable = true; + + array_item_prop_desc.is_enumerable_defined = true; + array_item_prop_desc.is_enumerable = true; + + array_item_prop_desc.is_configurable_defined = true; + array_item_prop_desc.is_configurable = true; + + ecma_op_object_define_own_property (array_obj_p, + result_prop_str_p, + &array_item_prop_desc, + true); + + ecma_deref_ecma_string (input_str_p); + } + ecma_deref_ecma_string (result_prop_str_p); + + /* Set length property of the result array */ + result_prop_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_LENGTH); + { + + ecma_property_descriptor_t array_item_prop_desc = ecma_make_empty_property_descriptor (); + array_item_prop_desc.is_value_defined = true; + + ecma_number_t *num_p = ecma_alloc_number (); + *num_p = (ecma_number_t) (re_ctx_p->num_of_captures / 2); + array_item_prop_desc.value = ecma_make_number_value (num_p); + + array_item_prop_desc.is_writable_defined = false; + array_item_prop_desc.is_enumerable_defined = false; + array_item_prop_desc.is_configurable_defined = false; + + ecma_op_object_define_own_property (array_obj_p, + result_prop_str_p, + &array_item_prop_desc, + true); + + ecma_dealloc_number (num_p); + } + ecma_deref_ecma_string (result_prop_str_p); +} /* re_set_result_array_properties */ + +/** + * RegExp helper function to start the recursive matching algorithm + * and create the result Array object + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value + */ +ecma_completion_value_t +ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */ + re_bytecode_t *bc_p, /**< start of the RegExp bytecode */ + const ecma_char_t *str_p) /**< start of the input string */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + int32_t input_length = ecma_zt_string_length (str_p); + re_matcher_ctx_t re_ctx; + re_ctx.input_start_p = str_p; + re_ctx.input_end_p = str_p + strlen ((char *) str_p); + re_ctx.match_limit = 0; + re_ctx.recursion_depth = 0; + + /* 1. Read bytecode header and init regexp matcher context. */ + re_ctx.flags = (uint8_t) re_get_value (&bc_p); + JERRY_DDLOG ("Exec with flags [global: %d, ignoreCase: %d, multiline: %d]\n", + re_ctx.flags & RE_FLAG_GLOBAL, + re_ctx.flags & RE_FLAG_IGNORE_CASE, + re_ctx.flags & RE_FLAG_MULTILINE); + + re_ctx.num_of_captures = re_get_value (&bc_p); + JERRY_ASSERT (re_ctx.num_of_captures % 2 == 0); + re_ctx.num_of_non_captures = re_get_value (&bc_p); + + MEM_DEFINE_LOCAL_ARRAY (saved_p, re_ctx.num_of_captures + re_ctx.num_of_non_captures, const ecma_char_t*); + for (uint32_t i = 0; i < re_ctx.num_of_captures + re_ctx.num_of_non_captures; i++) + { + saved_p[i] = NULL; + } + re_ctx.saved_p = saved_p; + + uint32_t num_of_iter_length = (re_ctx.num_of_captures / 2) + (re_ctx.num_of_non_captures - 1); + MEM_DEFINE_LOCAL_ARRAY (num_of_iter_p, num_of_iter_length, uint32_t); + for (uint32_t i = 0; i < num_of_iter_length; i++) + { + num_of_iter_p[i] = 0u; + } + + bool is_match = false; + re_ctx.num_of_iterations = num_of_iter_p; + int32_t index = 0; + + if (re_ctx.flags & RE_FLAG_GLOBAL) + { + ecma_string_t *magic_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_LASTINDEX_UL); + ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (obj_p, magic_str_p); + ecma_number_t *lastindex_num_p = ecma_get_number_from_value (lastindex_prop_p->u.named_data_property.value); + index = ecma_number_to_int32 (*lastindex_num_p); + JERRY_ASSERT (str_p != NULL); + str_p += ecma_number_to_int32 (*lastindex_num_p); + ecma_deref_ecma_string (magic_str_p); + } + + /* 2. Try to match */ + const ecma_char_t *sub_str_p; + while (str_p && str_p <= re_ctx.input_end_p && ecma_is_completion_value_empty (ret_value)) + { + if (index < 0 || index > input_length) + { + ecma_string_t *magic_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_LASTINDEX_UL); + ecma_number_t *lastindex_num_p = ecma_alloc_number (); + *lastindex_num_p = ECMA_NUMBER_ZERO; + ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true); + ecma_dealloc_number (lastindex_num_p); + ecma_deref_ecma_string (magic_str_p); + + is_match = false; + break; + } + else + { + sub_str_p = NULL; + ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, str_p, &sub_str_p), ret_value); + if (ecma_is_value_true (match_value)) + { + is_match = true; + break; + } + str_p++; + index++; + ECMA_FINALIZE (match_value); + } + } + + if (re_ctx.flags & RE_FLAG_GLOBAL) + { + ecma_string_t *magic_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING_LASTINDEX_UL); + ecma_number_t *lastindex_num_p = ecma_alloc_number (); + *lastindex_num_p = ((ecma_number_t) (sub_str_p - re_ctx.input_start_p)); + ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true); + ecma_dealloc_number (lastindex_num_p); + ecma_deref_ecma_string (magic_str_p); + } + + /* 3. Fill the result array or return with 'undefiend' */ + if (ecma_is_completion_value_empty (ret_value)) + { + if (is_match) + { + ecma_completion_value_t result_array = ecma_op_create_array_object (0, 0, false); + ecma_object_t *result_array_obj_p = ecma_get_object_from_completion_value (result_array); + + re_set_result_array_properties (result_array_obj_p, &re_ctx, index); + + for (uint32_t i = 0; i < re_ctx.num_of_captures; i += 2) + { + ecma_string_t *index_str_p = ecma_new_ecma_string_from_uint32 (i / 2); + + if (re_ctx.saved_p[i] && re_ctx.saved_p[i + 1] && re_ctx.saved_p[i + 1] >= re_ctx.saved_p[i]) + { + ecma_length_t capture_str_len = static_cast (re_ctx.saved_p[i + 1] - re_ctx.saved_p[i]); + ecma_string_t *capture_str_p; + + if (capture_str_len > 0) + { + capture_str_p = ecma_new_ecma_string (re_ctx.saved_p[i], capture_str_len); + } + else + { + capture_str_p = ecma_get_magic_string (ECMA_MAGIC_STRING__EMPTY); + } + ecma_op_object_put (result_array_obj_p, index_str_p, ecma_make_string_value (capture_str_p), true); + ecma_deref_ecma_string (capture_str_p); + } + else + { + ecma_op_object_put (result_array_obj_p, + index_str_p, + ecma_make_simple_value (ECMA_SIMPLE_VALUE_UNDEFINED), + true); + } + ecma_deref_ecma_string (index_str_p); + } + ret_value = result_array; + } + else + { + ret_value = ecma_make_normal_completion_value (ecma_make_simple_value (ECMA_SIMPLE_VALUE_UNDEFINED)); + } + } + MEM_FINALIZE_LOCAL_ARRAY (num_of_iter_p); + MEM_FINALIZE_LOCAL_ARRAY (saved_p); + + return ret_value; +} /* ecma_regexp_exec_helper */ + +/** + * @} + * @} + */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ diff --git a/jerry-core/ecma/operations/ecma-regexp-object.h b/jerry-core/ecma/operations/ecma-regexp-object.h new file mode 100644 index 0000000000..d9fc30062a --- /dev/null +++ b/jerry-core/ecma/operations/ecma-regexp-object.h @@ -0,0 +1,66 @@ +/* Copyright 2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ECMA_REGEXP_OBJECT_H +#define ECMA_REGEXP_OBJECT_H + +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + +#include "ecma-globals.h" +#include "re-compiler.h" + +/** \addtogroup ecma ECMA + * @{ + * + * \addtogroup ecmaregexpobject ECMA RegExp object related routines + * @{ + */ + +#define RE_EXECUTE_RECURSION_LIMIT 1000 /* Limit of RegExp executor recursion depth */ +#define RE_EXECUTE_MATCH_LIMIT 10000 /* Limit of RegExp execetur matching steps */ + +/** + * RegExp executor context + * + * FIXME: + * Add comments with description of the structure members + */ +typedef struct +{ + const ecma_char_t **saved_p; + const ecma_char_t *input_start_p; + const ecma_char_t *input_end_p; + uint32_t match_limit; + uint32_t recursion_depth; + uint32_t num_of_captures; + uint32_t num_of_non_captures; + uint32_t *num_of_iterations; + uint8_t flags; +} re_matcher_ctx_t; + +extern ecma_completion_value_t +ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str_p); + +extern ecma_completion_value_t +ecma_regexp_exec_helper (ecma_object_t *obj_p, re_bytecode_t *bc_p, const ecma_char_t *str_p); + +/** + * @} + * @} + */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ +#endif /* !ECMA_REGEXP_OBJECT_H */ From e027b4d65d09adf25b5054efae882c78d4896a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Lang=C3=B3?= Date: Fri, 26 Jun 2015 00:03:20 +0300 Subject: [PATCH 18/18] Enable regular expressions. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - add regular expressions support to JS parser and interpreter; - add tests for regular expressions. JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com --- jerry-core/parser/js/lexer.cpp | 99 ++++++++- jerry-core/parser/js/lexer.h | 6 +- jerry-core/parser/js/opcodes-dumper.cpp | 28 +++ jerry-core/parser/js/opcodes-dumper.h | 2 + jerry-core/parser/js/parser.cpp | 5 + jerry-core/parser/js/scopes-tree.cpp | 2 + jerry-core/vm/opcodes-ecma-support.h | 1 + jerry-core/vm/opcodes.cpp | 60 ++++++ jerry-core/vm/opcodes.h | 3 +- tests/jerry/regexp-alternatives.js | 61 ++++++ tests/jerry/regexp-assertions.js | 152 +++++++++++++ tests/jerry/regexp-backreference.js | 27 +++ tests/jerry/regexp-capture-groups.js | 199 ++++++++++++++++++ tests/jerry/regexp-character-class.js | 33 +++ tests/jerry/regexp-construct.js | 88 ++++++++ tests/jerry/regexp-literal.js | 25 +++ tests/jerry/regexp-non-capture-groups.js | 197 +++++++++++++++++ tests/jerry/regexp-routines.js | 50 +++++ .../regexp-simple-atom-and-iterations.js | 55 +++++ 19 files changed, 1087 insertions(+), 6 deletions(-) create mode 100644 tests/jerry/regexp-alternatives.js create mode 100644 tests/jerry/regexp-assertions.js create mode 100644 tests/jerry/regexp-backreference.js create mode 100644 tests/jerry/regexp-capture-groups.js create mode 100644 tests/jerry/regexp-character-class.js create mode 100644 tests/jerry/regexp-construct.js create mode 100644 tests/jerry/regexp-literal.js create mode 100644 tests/jerry/regexp-non-capture-groups.js create mode 100644 tests/jerry/regexp-routines.js create mode 100644 tests/jerry/regexp-simple-atom-and-iterations.js diff --git a/jerry-core/parser/js/lexer.cpp b/jerry-core/parser/js/lexer.cpp index a3926cc55e..c1f1d20ce7 100644 --- a/jerry-core/parser/js/lexer.cpp +++ b/jerry-core/parser/js/lexer.cpp @@ -15,9 +15,14 @@ */ #include "ecma-helpers.h" +#include "ecma-exceptions.h" #include "jrt-libc-includes.h" #include "jsp-mm.h" #include "lexer.h" +#include "mem-allocator.h" +#include "opcodes.h" +#include "parser.h" +#include "stack.h" #include "syntax-errors.h" static token saved_token, prev_token, sent_token, empty_token; @@ -961,6 +966,76 @@ parse_string (void) return ret; } /* parse_string */ +/** + * Parse string literal (ECMA-262 v5, 7.8.5) + */ +static token +parse_regexp (void) +{ + token result; + bool is_char_class = false; + + /* Eat up '/' */ + JERRY_ASSERT ((ecma_char_t) LA (0) == '/'); + consume_char (); + new_token (); + + while (true) + { + ecma_char_t c = (ecma_char_t) LA (0); + + if (c == '\0') + { + PARSE_ERROR ("Unclosed string", token_start - buffer_start); + } + else if (c == '\n') + { + PARSE_ERROR ("RegExp literal shall not contain newline character", token_start - buffer_start); + } + else if (c == '\\') + { + consume_char (); + } + else if (c == '[') + { + is_char_class = true; + } + else if (c == ']') + { + is_char_class = false; + } + else if (c == '/' && !is_char_class) + { + /* Eat up '/' */ + consume_char (); + break; + } + + consume_char (); + } + + /* Try to parse RegExp flags */ + while (true) + { + ecma_char_t c = (ecma_char_t) LA (0); + + if (c == '\0' + || !ecma_char_is_word_char (c) + || ecma_char_is_line_terminator (c)) + { + break; + } + consume_char (); + } + + result = convert_string_to_token (TOK_REGEXP, + (const ecma_char_t*) token_start, + static_cast (buffer - token_start)); + + token_start = NULL; + return result; +} /* parse_regexp */ + static void grobble_whitespaces (void) { @@ -1084,10 +1159,27 @@ lexer_next_token_private (void) } } - if (c == '/' && LA (1) == '/') + + if (c == '/') { - replace_comment_by_newline (); - return lexer_next_token_private (); + if (LA (1) == '/') + { + replace_comment_by_newline (); + return lexer_next_token_private (); + } + else if (!(sent_token.type == TOK_NAME + || sent_token.type == TOK_NULL + || sent_token.type == TOK_BOOL + || sent_token.type == TOK_CLOSE_BRACE + || sent_token.type == TOK_CLOSE_SQUARE + || sent_token.type == TOK_CLOSE_PAREN + || sent_token.type == TOK_SMALL_INT + || sent_token.type == TOK_NUMBER + || sent_token.type == TOK_STRING + || sent_token.type == TOK_REGEXP)) + { + return parse_regexp (); + } } switch (c) @@ -1203,7 +1295,6 @@ lexer_next_token (void) prev_token = sent_token; sent_token = lexer_next_token_private (); - if (sent_token.type == TOK_NEWLINE) { dump_current_line (); diff --git a/jerry-core/parser/js/lexer.h b/jerry-core/parser/js/lexer.h index f67b6a9914..3d09d0df83 100644 --- a/jerry-core/parser/js/lexer.h +++ b/jerry-core/parser/js/lexer.h @@ -99,7 +99,7 @@ typedef enum __attr_packed___ TOK_OPEN_PAREN, // ( TOK_CLOSE_PAREN, //) TOK_OPEN_SQUARE, // [ - TOK_CLOSE_SQUARE, // [ + TOK_CLOSE_SQUARE, // ] TOK_DOT, // . TOK_SEMICOLON, // ; @@ -152,6 +152,7 @@ typedef enum __attr_packed___ TOK_DIV, // / TOK_DIV_EQ, // /= TOK_EMPTY, + TOK_REGEXP, // RegularExpressionLiteral (/.../gim) } token_type; typedef size_t locus; @@ -170,6 +171,9 @@ typedef struct #define TOKEN_EMPTY_INITIALIZER {0, TOK_EMPTY, 0} void lexer_init (const char *, size_t, bool); +void lexer_init_source (const char *, size_t); + +void lexer_free (void); token lexer_next_token (void); void lexer_save_token (token); diff --git a/jerry-core/parser/js/opcodes-dumper.cpp b/jerry-core/parser/js/opcodes-dumper.cpp index 16ce7bfece..abc733033b 100644 --- a/jerry-core/parser/js/opcodes-dumper.cpp +++ b/jerry-core/parser/js/opcodes-dumper.cpp @@ -843,6 +843,34 @@ dump_number_assignment_res (lit_cpointer_t lit_id) return op; } +void +dump_regexp_assignment (operand op, lit_cpointer_t lit_id) +{ + switch (op.type) + { + case OPERAND_LITERAL: + { + const opcode_t opcode = getop_assignment (LITERAL_TO_REWRITE, OPCODE_ARG_TYPE_REGEXP, LITERAL_TO_REWRITE); + serializer_dump_op_meta (create_op_meta_101 (opcode, op.data.lit_id, lit_id)); + break; + } + case OPERAND_TMP: + { + const opcode_t opcode = getop_assignment (op.data.uid, OPCODE_ARG_TYPE_REGEXP, LITERAL_TO_REWRITE); + serializer_dump_op_meta (create_op_meta_001 (opcode, lit_id)); + break; + } + } +} + +operand +dump_regexp_assignment_res (lit_cpointer_t lit_id) +{ + operand op = tmp_operand (); + dump_regexp_assignment (op, lit_id); + return op; +} + void dump_smallint_assignment (operand op, idx_t uid) { diff --git a/jerry-core/parser/js/opcodes-dumper.h b/jerry-core/parser/js/opcodes-dumper.h index 94719a6e73..72ed8573cd 100644 --- a/jerry-core/parser/js/opcodes-dumper.h +++ b/jerry-core/parser/js/opcodes-dumper.h @@ -69,6 +69,8 @@ void dump_string_assignment (operand, lit_cpointer_t); operand dump_string_assignment_res (lit_cpointer_t); void dump_number_assignment (operand, lit_cpointer_t); operand dump_number_assignment_res (lit_cpointer_t); +void dump_regexp_assignment (operand, lit_cpointer_t); +operand dump_regexp_assignment_res (lit_cpointer_t); void dump_smallint_assignment (operand, idx_t); operand dump_smallint_assignment_res (idx_t); void dump_undefined_assignment (operand); diff --git a/jerry-core/parser/js/parser.cpp b/jerry-core/parser/js/parser.cpp index e6014f0c1d..a2696256cf 100644 --- a/jerry-core/parser/js/parser.cpp +++ b/jerry-core/parser/js/parser.cpp @@ -1,4 +1,5 @@ /* Copyright 2014-2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +23,7 @@ #include "opcodes-dumper.h" #include "opcodes-native-call.h" #include "parser.h" +#include "re-parser.h" #include "scopes-tree.h" #include "serializer.h" #include "stack.h" @@ -745,6 +747,7 @@ parse_object_literal (void) | 'false' | number_literal | string_literal + | regexp_literal ; */ static operand parse_literal (void) @@ -753,6 +756,7 @@ parse_literal (void) { case TOK_NUMBER: return dump_number_assignment_res (token_data_as_lit_cp ()); case TOK_STRING: return dump_string_assignment_res (token_data_as_lit_cp ()); + case TOK_REGEXP: return dump_regexp_assignment_res (token_data_as_lit_cp ()); case TOK_NULL: return dump_null_assignment_res (); case TOK_BOOL: return dump_boolean_assignment_res ((bool) token_data ()); case TOK_SMALL_INT: return dump_smallint_assignment_res ((idx_t) token_data ()); @@ -786,6 +790,7 @@ parse_primary_expression (void) case TOK_BOOL: case TOK_SMALL_INT: case TOK_NUMBER: + case TOK_REGEXP: case TOK_STRING: return parse_literal (); case TOK_NAME: return literal_operand (token_data_as_lit_cp ()); case TOK_OPEN_SQUARE: return parse_array_literal (); diff --git a/jerry-core/parser/js/scopes-tree.cpp b/jerry-core/parser/js/scopes-tree.cpp index ce083e868f..7a00d93992 100644 --- a/jerry-core/parser/js/scopes-tree.cpp +++ b/jerry-core/parser/js/scopes-tree.cpp @@ -291,6 +291,7 @@ generate_opcode (scopes_tree tree, opcode_counter_t opc_index, lit_id_hash_table } case OPCODE_ARG_TYPE_NUMBER: case OPCODE_ARG_TYPE_NUMBER_NEGATE: + case OPCODE_ARG_TYPE_REGEXP: case OPCODE_ARG_TYPE_STRING: case OPCODE_ARG_TYPE_VARIABLE: { @@ -430,6 +431,7 @@ count_new_literals_in_opcode (scopes_tree tree, opcode_counter_t opc_index) } case OPCODE_ARG_TYPE_NUMBER: case OPCODE_ARG_TYPE_NUMBER_NEGATE: + case OPCODE_ARG_TYPE_REGEXP: case OPCODE_ARG_TYPE_STRING: case OPCODE_ARG_TYPE_VARIABLE: { diff --git a/jerry-core/vm/opcodes-ecma-support.h b/jerry-core/vm/opcodes-ecma-support.h index 8fd39c9dab..9826c63d25 100644 --- a/jerry-core/vm/opcodes-ecma-support.h +++ b/jerry-core/vm/opcodes-ecma-support.h @@ -29,6 +29,7 @@ #include "ecma-objects.h" #include "ecma-objects-general.h" #include "ecma-reference.h" +#include "ecma-regexp-object.h" #include "ecma-try-catch-macro.h" #include "serializer.h" diff --git a/jerry-core/vm/opcodes.cpp b/jerry-core/vm/opcodes.cpp index 04e04948fa..d1e3a6fc59 100644 --- a/jerry-core/vm/opcodes.cpp +++ b/jerry-core/vm/opcodes.cpp @@ -1,4 +1,5 @@ /* Copyright 2014-2015 Samsung Electronics Co., Ltd. + * Copyright 2015 University of Szeged. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -162,6 +163,65 @@ opfunc_assignment (opcode_t opdata, /**< operation data */ dst_var_idx, ecma_make_number_value (num_p)); } + else if (type_value_right == OPCODE_ARG_TYPE_REGEXP) + { +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN + lit_cpointer_t lit_cp = serializer_get_literal_cp_by_uid (src_val_descr, + int_data->opcodes_p, + int_data->pos); + ecma_string_t *string_p = ecma_new_ecma_string_from_lit_cp (lit_cp); + + int32_t re_str_len = ecma_string_get_length (string_p); + MEM_DEFINE_LOCAL_ARRAY (re_str_p, re_str_len + 1, ecma_char_t); + + ssize_t zt_str_size = (ssize_t) sizeof (ecma_char_t) * (re_str_len + 1); + ecma_string_to_zt_string (string_p, re_str_p, zt_str_size); + + ecma_char_t *ch_p = re_str_p; + ecma_char_t *last_slash_p = NULL; + while (*ch_p) + { + if (*ch_p == '/') + { + last_slash_p = ch_p; + } + ch_p++; + } + + JERRY_ASSERT (last_slash_p != NULL); + JERRY_ASSERT ((re_str_p < last_slash_p) && (last_slash_p < ch_p)); + JERRY_ASSERT ((last_slash_p - re_str_p) > 0); + ecma_string_t *pattern_p = ecma_new_ecma_string (re_str_p, (ecma_length_t) (last_slash_p - re_str_p)); + ecma_string_t *flags_p = NULL; + + if ((ch_p - last_slash_p) > 1) + { + flags_p = ecma_new_ecma_string (last_slash_p + 1, (ecma_length_t) ((ch_p - last_slash_p - 1))); + } + + ECMA_TRY_CATCH (regexp_obj_value, + ecma_op_create_regexp_object (pattern_p, flags_p), + ret_value); + + ret_value = set_variable_value (int_data, + int_data->pos, + dst_var_idx, + regexp_obj_value); + + ECMA_FINALIZE (regexp_obj_value); + + ecma_deref_ecma_string (pattern_p); + if (flags_p != NULL) + { + ecma_deref_ecma_string (flags_p); + } + + MEM_FINALIZE_LOCAL_ARRAY (re_str_p) + ecma_deref_ecma_string (string_p); +#else + JERRY_UNIMPLEMENTED ("Regular Expressions are not supported in compact profile!"); +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */ + } else { JERRY_ASSERT (type_value_right == OPCODE_ARG_TYPE_SMALLINT_NEGATE); diff --git a/jerry-core/vm/opcodes.h b/jerry-core/vm/opcodes.h index 1afea172c0..ff7d8594aa 100644 --- a/jerry-core/vm/opcodes.h +++ b/jerry-core/vm/opcodes.h @@ -50,7 +50,8 @@ typedef enum OPCODE_ARG_TYPE_NUMBER, /**< index of number literal */ OPCODE_ARG_TYPE_NUMBER_NEGATE, /**< index of number literal with negation */ OPCODE_ARG_TYPE_STRING, /**< index of string literal */ - OPCODE_ARG_TYPE_VARIABLE /**< index of variable name */ + OPCODE_ARG_TYPE_VARIABLE, /**< index of string literal with variable name */ + OPCODE_ARG_TYPE_REGEXP /**< index of string literal with regular expression */ } opcode_arg_type_operand; /** diff --git a/tests/jerry/regexp-alternatives.js b/tests/jerry/regexp-alternatives.js new file mode 100644 index 0000000000..dcb102ad86 --- /dev/null +++ b/tests/jerry/regexp-alternatives.js @@ -0,0 +1,61 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp ("a|b"); +assert (r.exec("a") == "a"); + +r = new RegExp ("a|b"); +assert (r.exec("b") == "b"); + +r = new RegExp ("a|b|c"); +assert (r.exec("b") == "b"); + +r = new RegExp ("a|b|c"); +assert (r.exec("c") == "c"); + +r = new RegExp ("a|b|c|d"); +assert (r.exec("") == undefined); + +r = new RegExp ("a|b|c|d"); +assert (r.exec("a") == "a"); + +r = new RegExp ("a|b|c|d"); +assert (r.exec("b") == "b"); + +r = new RegExp ("a|b|c|d"); +assert (r.exec("c") == "c"); + +r = new RegExp ("a|b|c|d"); +assert (r.exec("d") == "d"); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("e") == undefined); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("bb") == "bb"); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("bba") == "bb"); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("bbbb") == "bb"); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("a") == "a"); + +r = new RegExp ("a|bb|c|d"); +assert (r.exec("b") == undefined); diff --git a/tests/jerry/regexp-assertions.js b/tests/jerry/regexp-assertions.js new file mode 100644 index 0000000000..a8656b691d --- /dev/null +++ b/tests/jerry/regexp-assertions.js @@ -0,0 +1,152 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var t; + +t = new RegExp ("^alma$").exec("alma"); +assert (t == "alma"); + +t = new RegExp ("^alma$").exec("almaa"); +assert (t == undefined); + +t = new RegExp ("^alma$").exec("aalma"); +assert (t == undefined); + +t = new RegExp ("^alma").exec("alma"); +assert (t == "alma"); + +t = new RegExp ("^alma").exec("almaa"); +assert (t == "alma"); + +t = new RegExp ("^alma").exec("aalma"); +assert (t == undefined); + +t = new RegExp ("alma$").exec("alma"); +assert (t == "alma"); + +t = new RegExp ("alma$").exec("almaa"); +assert (t == undefined); + +t = new RegExp ("alma$").exec("aalma"); +assert (t == "alma"); + +t = new RegExp ("\\bis\\b").exec("This island is beautiful"); +assert (t == "is"); + +t = new RegExp ("\\Bis\\B").exec("This island is beautiful"); +assert (t == undefined); + +t = new RegExp ("\\Bis").exec("This island is beautiful"); +assert (t == "is"); + +t = new RegExp ("is\\B").exec("This island is beautiful"); +assert (t == "is"); + +t = new RegExp ("\\Bis\\b").exec("This island is beautiful"); +assert (t == "is"); + +t = new RegExp ("\\bis\\B").exec("This island is beautiful"); +assert (t == "is"); + +t = new RegExp ("al(?=(ma))").exec("al"); +assert (t == undefined); + +t = new RegExp ("al(?!(ma))").exec("ala"); +assert (t[0] == "al"); +assert (t[1] == undefined); + +t = new RegExp ("al(?=(ma))").exec("alma"); +assert (t[0] == "al"); +assert (t[1] == "ma"); + +t = new RegExp ("al(?=(ma))").exec("almama"); +assert (t[0] == "al"); +assert (t[1] == "ma"); + +t = new RegExp ("(al)(?=(ma))ma").exec("al"); +assert (t == undefined); + +t = new RegExp ("(al)(?=(ma)ma)").exec("al"); +assert (t == undefined); + +t = new RegExp ("al(?=(ma))*ma").exec("alma"); +assert (t[0] == "alma"); +assert (t[1] == undefined); + +t = new RegExp ("al(?!(ma))*ma").exec("alma"); +assert (t[0] == "alma"); +assert (t[1] == undefined); + +t = new RegExp ("al(?=(ma))ma").exec("alma"); +assert (t[0] == "alma"); +assert (t[1] == "ma"); + +t = new RegExp ("al(?!(ma))ma").exec("alma"); +assert (t == undefined); + +t = new RegExp ("(al)(?=(ma))ma").exec("almama"); +t = new RegExp ("(al)(?=(ma)ma)").exec("almama"); + +t = new RegExp ("al(?=(ma))ma").exec("almama"); +assert (t[0] == "alma"); +assert (t[1] == "ma"); + +t = new RegExp ("al(?=(ma)ma)").exec("almama"); +assert (t[0] == "al"); +assert (t[1] == "ma"); + +t = new RegExp ("al(?!(ma))ma").exec("almama"); +assert (t == undefined); + +t = new RegExp ("a(?=(a)(a))aab|aaac").exec("aaac"); +t = new RegExp ("a(?=(a)(a))aab|aaac").exec("aaab"); + +t = new RegExp ("(?!(a)b)|ab").exec("ab"); +assert (t[0] == "ab"); +assert (t[1] == undefined); + +t = new RegExp ("(?=(a)b)|ab").exec("ab"); +assert (t[0] == ""); +assert (t[1] == "a"); + +t = new RegExp ("(?=a|.)Dt").exec("Dt"); +assert (t == "Dt"); + +t = new RegExp ("(?=.|a)Dt").exec("Dt"); +assert (t == "Dt"); + +t = new RegExp ("(?=a|b)Dt").exec("Dt"); +assert (t == undefined); + +t = new RegExp ("(?=.|P)").exec("a"); +assert (t == ""); + +t = new RegExp ("(?=.)").exec("a"); +assert (t == ""); + +t = new RegExp ("(?!a|.)Dt").exec("Dt"); +assert (t == undefined); + +t = new RegExp ("(?!.|a)Dt").exec("Dt"); +assert (t == undefined); + +t = new RegExp ("(?!a|b)Dt").exec("Dt"); +assert (t == "Dt"); + +t = new RegExp ("(?!.|P)").exec("a"); +assert (t == ""); + +t = new RegExp ("(?!.)").exec("a"); +assert (t == ""); diff --git a/tests/jerry/regexp-backreference.js b/tests/jerry/regexp-backreference.js new file mode 100644 index 0000000000..d638ab5dad --- /dev/null +++ b/tests/jerry/regexp-backreference.js @@ -0,0 +1,27 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp ("(a)b\\1").exec("aba"); +assert (r[0] == "aba"); +assert (r[1] == "a"); + +r = new RegExp ("(a)b\\1").exec("b"); +assert (r == undefined); + +r = new RegExp ("(a)*b\\1").exec("b"); +assert (r[0] == "b"); +assert (r[1] == undefined); diff --git a/tests/jerry/regexp-capture-groups.js b/tests/jerry/regexp-capture-groups.js new file mode 100644 index 0000000000..85bc8d21bb --- /dev/null +++ b/tests/jerry/regexp-capture-groups.js @@ -0,0 +1,199 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +// Simple test cases +r = new RegExp ("()"); +assert (r.exec ("a") == ","); + +r = new RegExp ("(a)"); +assert (r.exec ("a") == "a,a"); + +r = new RegExp ("((a)b)c"); +assert (r.exec ("abc") == "abc,ab,a"); + +r = new RegExp ("(a)*"); +assert (r.exec ("b")[0] == ""); +assert (r.exec ("b")[1] == undefined); +assert (r.exec ("aaaa") == "aaaa,a"); + +r = new RegExp ("(a)+"); +assert (r.exec ("aaaa") == "aaaa,a"); + +r = new RegExp ("(a){4}"); +assert (r.exec ("aaaa") == "aaaa,a"); + +r = new RegExp ("(a){1,2}"); +assert (r.exec ("a") == "a,a"); +assert (r.exec ("aa") == "aa,a"); +assert (r.exec ("aaaa") == "aa,a"); + +r = new RegExp ("(a)?"); +assert (r.exec ("a") == "a,a"); +assert (r.exec ("b")[0] == ""); +assert (r.exec ("b")[1] == undefined); + +// Test greedy iterations +r = new RegExp ("(a){1,3}a"); +assert (r.exec("aa") == "aa,a"); + +r = new RegExp ("(a){1,3}a"); +assert (r.exec("aaa") == "aaa,a"); + +r = new RegExp ("(a){1,3}"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a){1,3}"); +assert (r.exec("aaa") == "aaa,a"); + +r = new RegExp ("(a){1,3}"); +assert (r.exec("aaaa") == "aaa,a"); + +r = new RegExp ("(a){1,5}"); +assert (r.exec("aaaa") == "aaaa,a"); + +r = new RegExp ("(a|b){1,2}"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a|b){1,3}a"); +assert (r.exec("aaa") == "aaa,a"); + +r = new RegExp ("(a|b){1,3}a"); +assert (r.exec("aba") == "aba,b"); + +r = new RegExp ("(a|b){1,3}a"); +assert (r.exec("b") == undefined); + +r = new RegExp ("(a|b){1,3}a"); +assert (r.exec("bbb") == undefined); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("aa") == "aa,a"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("aaa") == "aaa,a"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("ab") == "ab,b"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("aba") == "aba,a"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("bab") == "bab,b"); + +r = new RegExp ("(a|b){1,3}"); +assert (r.exec("bbb") == "bbb,b"); + +r = new RegExp ("(a|b){1,4}a"); +assert (r.exec("bbb") == undefined); + +r = new RegExp ("(a|b){1,4}"); +assert (r.exec("ab") == "ab,b"); + +r = new RegExp ("(a|b){1,4}"); +assert (r.exec("aba") == "aba,a"); + +r = new RegExp ("(a|b){1,4}"); +assert (r.exec("bbb") == "bbb,b"); + +r = new RegExp ("(a|b){1,5}"); +assert (r.exec("aba") == "aba,a"); + +r = new RegExp ("(a|b){1,5}"); +assert (r.exec("abab") == "abab,b"); + +r = new RegExp ("(a|b){1,5}"); +assert (r.exec("bbb") == "bbb,b"); + +r = new RegExp ("(aba)*"); +assert (r.exec("aaaa") == ","); + +r = new RegExp ("(aba)+"); +assert (r.exec("aaaa") == undefined); + +r = new RegExp ("(a|bb|c|d)"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a|b)"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a|b)+"); +assert (r.exec("aba") == "aba,a"); + +r = new RegExp ("(a|b)"); +assert (r.exec("b") == "b,b"); + +r = new RegExp ("(a)"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a)*"); +assert (r.exec("a") == "a,a"); + +r = new RegExp ("(a)*"); +assert (r.exec("aaaa") == "aaaa,a"); + +r = new RegExp ("(a)+"); +assert (r.exec("aaaa") == "aaaa,a"); + +r = new RegExp ("(a|aa){0,3}b"); +assert (r.exec("aaaaaab") == "aaaaaab,aa"); + +r = new RegExp ("((a){2,3}){4}b"); +assert (r.exec("aaaaaaaab") == "aaaaaaaab,aa,a"); + +// Test non-greedy iterations +r = new RegExp ("(a)+?"); +assert (r.exec("aaaa") == "a,a"); + +r = new RegExp ("(a)*?aa"); +assert (r.exec("aaaa") == "aa,"); + +r = new RegExp ("(aaa|aa)*?aa"); +assert (r.exec("aaaa")[0] == "aa"); +assert (r.exec("aaaa")[1] == undefined); + +r = new RegExp ("(a)??aa"); +assert (r.exec("aaaa")[0] == "aa"); +assert (r.exec("aaaa")[1] == undefined); + +r = new RegExp ("(a)?aa"); +assert (r.exec("aaaa") == "aaa,a"); + +r = new RegExp ("(()*?)*?a"); +assert (r.exec("ba")[0] == "a"); +assert (r.exec("ba")[1] == undefined); +assert (r.exec("ba")[2] == undefined); + +r = new RegExp ("((bb?)*)*a"); +assert (r.exec("bbba") == "bbba,bbb,b"); + +r = new RegExp ("((bb?)*)*bbb\\Ba"); +assert (r.exec("bbba")[0] == "bbba"); +assert (r.exec("bbba")[1] == undefined); +assert (r.exec("bbba")[2] == undefined); + +r = new RegExp ("(a??){0,1}a"); +assert (r.exec("aa") == "aa,a"); + +r = new RegExp ("(a?){0,1}a"); +assert (r.exec("aa") == "aa,a"); + +r = new RegExp ("(a{0,1}?){0,1}a"); +assert (r.exec("aa") == "aa,a"); diff --git a/tests/jerry/regexp-character-class.js b/tests/jerry/regexp-character-class.js new file mode 100644 index 0000000000..aaa744deef --- /dev/null +++ b/tests/jerry/regexp-character-class.js @@ -0,0 +1,33 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp ("[abc]*").exec("aaabbcccabcacbacabacbacab"); +assert (r == "aaabbcccabcacbacabacbacab"); + +r = new RegExp ("[abc]*").exec("aaabbcccabdcacb"); +assert (r == "aaabbcccab"); + +r = new RegExp ("[abc]*").exec("defghjklmnopqrstuvwxyz"); +assert (r == ""); + +r = new RegExp ("[a-z]*").exec("abcdefghjklmnopqrstuvwxyz"); +assert (r == "abcdefghjklmnopqrstuvwxyz"); + +r = new RegExp ("[A-Z]*").exec("abcdefghjklmnopqrstuvwxyz"); +assert (r == ""); + +// FIXME: Add more tescase when Unicode support is finished! diff --git a/tests/jerry/regexp-construct.js b/tests/jerry/regexp-construct.js new file mode 100644 index 0000000000..88faa9e771 --- /dev/null +++ b/tests/jerry/regexp-construct.js @@ -0,0 +1,88 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp (); +assert (r.source == "(?:)"); +assert (r.global == false); +assert (r.ignoreCase == false); +assert (r.multiline == false); + +r = new RegExp ("a"); +assert (r.source == "a"); +assert (r.global == false); +assert (r.ignoreCase == false); +assert (r.multiline == false); + +r = new RegExp ("a","gim"); +assert (r.source == "a"); +assert (r.global == true); +assert (r.ignoreCase == true); +assert (r.multiline == true); + +r = RegExp ("a"); +assert (r.source == "a"); +assert (r.global == false); +assert (r.ignoreCase == false); +assert (r.multiline == false); + +r = RegExp ("a","gim"); +assert (r.source == "a"); +assert (r.global == true); +assert (r.ignoreCase == true); +assert (r.multiline == true); + +var r2; +try { + r2 = RegExp (r,"gim"); + assert(false); +} +catch ( e ) +{ + assert (e instanceof TypeError); +} + +r2 = RegExp (r); +assert (r2.source == "a"); +assert (r2.global == true); +assert (r2.ignoreCase == true); +assert (r2.multiline == true); + +r2 = RegExp (r, undefined); +assert (r2.source == "a"); +assert (r2.global == true); +assert (r2.ignoreCase == true); +assert (r2.multiline == true); + +r = /(?:)/; +assert (r.source == "(?:)"); +assert (r.global == false); +assert (r.ignoreCase == false); +assert (r.multiline == false); + +r = /a/; +assert (r.source == "a"); +assert (r.global == false); +assert (r.ignoreCase == false); +assert (r.multiline == false); + +r = /a/gim; +assert (r.source == "a"); +assert (r.global == true); +assert (r.ignoreCase == true); +assert (r.multiline == true); + +assert(Object.prototype.toString.call(RegExp.prototype) === '[object RegExp]'); diff --git a/tests/jerry/regexp-literal.js b/tests/jerry/regexp-literal.js new file mode 100644 index 0000000000..70124e9463 --- /dev/null +++ b/tests/jerry/regexp-literal.js @@ -0,0 +1,25 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var t; + +t = /\//.exec("/"); +assert (t == "/"); + +t = /[/]/.exec("/"); +assert ("a"+/x/+"b" == "a/x/b"); + +t = /\/\[[\]/]/.exec("/[/"); +assert (t == "/[/"); diff --git a/tests/jerry/regexp-non-capture-groups.js b/tests/jerry/regexp-non-capture-groups.js new file mode 100644 index 0000000000..55bbcc9ded --- /dev/null +++ b/tests/jerry/regexp-non-capture-groups.js @@ -0,0 +1,197 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +// Simple test cases +r = new RegExp ("(?:)"); +assert (r.exec ("a") == ""); + +r = new RegExp ("(?:a)"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:(?:a)b)c"); +assert (r.exec ("abc") == "abc"); + +r = new RegExp ("(?:a)*"); +assert (r.exec ("b") == ""); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a)+"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a){4}"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a){1,2}"); +assert (r.exec ("a") == "a"); +assert (r.exec ("aa") == "aa"); +assert (r.exec ("aaaa") == "aa"); + +r = new RegExp ("(?:a)?"); +assert (r.exec ("a") == "a"); +assert (r.exec ("b") == ""); + +// Test greedy iterations +r = new RegExp ("(?:a){1,3}a"); +assert (r.exec ("aa") == "aa"); + +r = new RegExp ("(?:a){1,3}a"); +assert (r.exec ("aaa") == "aaa"); + +r = new RegExp ("(?:a){1,3}"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a){1,3}"); +assert (r.exec ("aaa") == "aaa"); + +r = new RegExp ("(?:a){1,3}"); +assert (r.exec ("aaaa") == "aaa"); + +r = new RegExp ("(?:a){1,5}"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a|b){1,2}"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a|b){1,3}a"); +assert (r.exec ("aaa") == "aaa"); + +r = new RegExp ("(?:a|b){1,3}a"); +assert (r.exec ("aba") == "aba"); + +r = new RegExp ("(?:a|b){1,3}a"); +assert (r.exec ("b") == undefined); + +r = new RegExp ("(?:a|b){1,3}a"); +assert (r.exec ("bbb") == undefined); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("aa") == "aa"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("aaa") == "aaa"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("ab") == "ab"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("aba") == "aba"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("bab") == "bab"); + +r = new RegExp ("(?:a|b){1,3}"); +assert (r.exec ("bbb") == "bbb"); + +r = new RegExp ("(?:a|b){1,4}a"); +assert (r.exec ("bbb") == undefined); + +r = new RegExp ("(?:a|b){1,4}"); +assert (r.exec ("ab") == "ab"); + +r = new RegExp ("(?:a|b){1,4}"); +assert (r.exec ("aba") == "aba"); + +r = new RegExp ("(?:a|b){1,4}"); +assert (r.exec ("bbb") == "bbb"); + +r = new RegExp ("(?:a|b){1,5}"); +assert (r.exec ("abab") == "abab"); + +r = new RegExp ("(?:aba)*"); +assert (r.exec ("aaaa") == ""); + +r = new RegExp ("(?:aba)+"); +assert (r.exec ("aaaa") == undefined); + +r = new RegExp ("(?:a|bb|c|d)"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a|b|c|d)"); +assert (r.exec ("") == undefined); + +r = new RegExp ("(?:a|b|c|d)"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a|b|c|d)"); +assert (r.exec ("b") == "b"); + +r = new RegExp ("(?:a|b|c|d)"); +assert (r.exec ("c") == "c"); + +r = new RegExp ("(?:a|b|c|d)"); +assert (r.exec ("d") == "d"); + +r = new RegExp ("(?:a|b)+"); +assert (r.exec ("aba") == "aba"); + +r = new RegExp ("(?:a|b)"); +assert (r.exec ("b") == "b"); + +r = new RegExp ("(?:a)"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a)*"); +assert (r.exec ("a") == "a"); + +r = new RegExp ("(?:a)*"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a)+"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("(?:a)?aa"); +assert (r.exec ("aaaa") == "aaa"); + +r = new RegExp ("(?:a?){0,1}a"); +assert (r.exec ("aa") == "aa"); + +r = new RegExp ("(?:a|aa){0,3}b"); +assert (r.exec ("aaaaaab") == "aaaaaab"); + +r = new RegExp ("(?:(?:a){2,3}){4}b"); +assert (r.exec ("aaaaaaaab") == "aaaaaaaab"); + +// Test non-greedy iterations +r = new RegExp ("(?:a)+?"); +assert (r.exec ("aaaa") == "a"); + +r = new RegExp ("(?:a)*?aa"); +assert (r.exec ("aaaa") == "aa"); + +r = new RegExp ("(?:aaa|aa)*?aa"); +assert (r.exec ("aaaa") == "aa"); + +r = new RegExp ("(?:a)??aa"); +assert (r.exec ("aaaa") == "aa"); + +r = new RegExp ("(?:(?:)*?)*?a"); +assert (r.exec ("ba") == "a"); + +r = new RegExp ("(?:(?:bb?)*)*a"); +assert (r.exec ("bbba") == "bbba"); + +r = new RegExp ("(?:(?:bb?)*)*bbb\\Ba"); +assert (r.exec ("bbba") == "bbba"); + +r = new RegExp ("(?:a??){0,1}a"); +assert (r.exec ("aa") == "aa"); + +r = new RegExp ("(?:a{0,1}?){0,1}a"); +assert (r.exec ("aa") == "aa"); diff --git a/tests/jerry/regexp-routines.js b/tests/jerry/regexp-routines.js new file mode 100644 index 0000000000..df3653ca50 --- /dev/null +++ b/tests/jerry/regexp-routines.js @@ -0,0 +1,50 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp ("a"); +assert (r.exec ("a") == "a"); +assert (r.exec ("b") == undefined); +try { + r.exec.call({}, "a"); + assert (false) +} +catch (e) +{ + assert (e instanceof TypeError); +} + +assert (r.test ("a") == true); +assert (r.test ("b") == false); +try { + r.test.call({}, "a"); + assert (false) +} +catch (e) +{ + assert (e instanceof TypeError); +} + +r = new RegExp ("a", "mig"); +assert (r.toString () == "/a/gim"); +try { + r.toString.call({}, "a"); + assert (false) +} +catch (e) +{ + assert (e instanceof TypeError); +} diff --git a/tests/jerry/regexp-simple-atom-and-iterations.js b/tests/jerry/regexp-simple-atom-and-iterations.js new file mode 100644 index 0000000000..71d2aafe9b --- /dev/null +++ b/tests/jerry/regexp-simple-atom-and-iterations.js @@ -0,0 +1,55 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// Copyright 2015 University of Szeged. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r; + +r = new RegExp ("a"); +assert (r.exec ("a") == "a"); +assert (r.exec ("b") == undefined); + +r = new RegExp ("abc"); +assert (r.exec ("abc") == "abc"); + +r = new RegExp ("a*"); +assert (r.exec ("aaa") == "aaa"); +assert (r.exec ("b") == ""); + +r = new RegExp ("a+"); +assert (r.exec ("aaa") == "aaa"); +assert (r.exec ("b") == undefined); + +r = new RegExp ("ab*"); +assert (r.exec ("a") == "a"); +assert (r.exec ("ab") == "ab"); +assert (r.exec ("abbbb") == "abbbb"); +assert (r.exec ("bbb") == undefined); + +r = new RegExp ("a?"); +assert (r.exec ("a") == "a"); +assert (r.exec ("b") == ""); + +r = new RegExp ("a{4}"); +assert (r.exec ("aaa") == undefined); +assert (r.exec ("aaaaa") == "aaaa"); +assert (r.exec ("aaaa") == "aaaa"); + +r = new RegExp ("a{2,6}"); +assert (r.exec ("a") == undefined); +assert (r.exec ("aa") == "aa"); +assert (r.exec ("aaaaaa") == "aaaaaa"); +assert (r.exec ("aaaaaaa") == "aaaaaa"); + +r = new RegExp (".*"); +assert (r.exec ("abcdefghijkl") == "abcdefghijkl");