Skip to content

Commit aa6ba24

Browse files
committed
refactor(ecmascript): improve string to number conversion (#6577)
1 parent b9d7c5f commit aa6ba24

File tree

1 file changed

+67
-1
lines changed

1 file changed

+67
-1
lines changed

crates/oxc_ecmascript/src/string_to_number.rs

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,74 @@ pub trait StringToNumber {
22
fn string_to_number(&self) -> f64;
33
}
44

5+
/// `StringToNumber`
6+
///
7+
/// <https://tc39.es/ecma262/#sec-stringtonumber>
58
impl StringToNumber for &str {
69
fn string_to_number(&self) -> f64 {
7-
self.parse::<f64>().unwrap_or(f64::NAN)
10+
let s = self.trim_matches(is_trimmable_whitespace);
11+
12+
match s {
13+
"" => return 0.0,
14+
"-Infinity" => return f64::NEG_INFINITY,
15+
"Infinity" | "+Infinity" => return f64::INFINITY,
16+
// Make sure that no further variants of "infinity" are parsed.
17+
"inf" | "-inf" | "+inf" => return f64::NAN,
18+
_ => {}
19+
}
20+
21+
let mut bytes = s.bytes();
22+
23+
if s.len() > 2 && bytes.next() == Some(b'0') {
24+
let radix: u32 = match bytes.next() {
25+
Some(b'x' | b'X') => 16,
26+
Some(b'o' | b'O') => 8,
27+
Some(b'b' | b'B') => 2,
28+
_ => 0,
29+
};
30+
31+
if radix != 0 {
32+
let s = &s[2..];
33+
34+
// Fast path
35+
if let Ok(value) = u32::from_str_radix(s, radix) {
36+
return f64::from(value);
37+
}
38+
39+
// Slow path
40+
let mut value: f64 = 0.0;
41+
for c in bytes {
42+
if let Some(digit) = char::from(c).to_digit(radix) {
43+
value = value.mul_add(f64::from(radix), f64::from(digit));
44+
} else {
45+
return f64::NAN;
46+
}
47+
}
48+
return value;
49+
}
50+
}
51+
52+
s.parse::<f64>().unwrap_or(f64::NAN)
853
}
954
}
55+
56+
// <https://github.com/boa-dev/boa/blob/94d08fe4e68791ceca3c4b7d94ccc5f3588feeb3/core/string/src/lib.rs#L55>
57+
/// Helper function to check if a `char` is trimmable.
58+
pub(crate) const fn is_trimmable_whitespace(c: char) -> bool {
59+
// The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
60+
//
61+
// Rust uses \p{White_Space} by default, which also includes:
62+
// `\u{0085}' (next line)
63+
// And does not include:
64+
// '\u{FEFF}' (zero width non-breaking space)
65+
// Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
66+
matches!(
67+
c,
68+
'\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
69+
// Unicode Space_Separator category
70+
'\u{1680}' | '\u{2000}'
71+
..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
72+
// Line terminators: https://tc39.es/ecma262/#sec-line-terminators
73+
'\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
74+
)
75+
}

0 commit comments

Comments
 (0)