diff --git a/crates/oxc_parser/src/lexer/unicode.rs b/crates/oxc_parser/src/lexer/unicode.rs index ed98d70a72720..e3c379e4e6e5b 100644 --- a/crates/oxc_parser/src/lexer/unicode.rs +++ b/crates/oxc_parser/src/lexer/unicode.rs @@ -363,32 +363,31 @@ impl<'a> Lexer<'a> { // Section 12.9.4 String Literals // LegacyOctalEscapeSequence // NonOctalDecimalEscapeSequence - a @ '0'..='7' if !in_template => { - let mut num = String::new_in(self.allocator); - num.push(a); - match a { - '4'..='7' => { - if matches!(self.peek_byte(), Some(b'0'..=b'7')) { - let b = self.consume_char(); - num.push(b); + c @ '0'..='7' if !in_template => { + let first_digit = c as u8 - b'0'; + let mut value = first_digit; + + if matches!(self.peek_byte(), Some(b'0'..=b'7')) { + let digit = self.consume_char() as u8 - b'0'; + value = value * 8 + digit; + if first_digit < 4 && matches!(self.peek_byte(), Some(b'0'..=b'7')) { + let digit = self.consume_char() as u8 - b'0'; + value = value * 8 + digit; + + if value >= 128 { + // `value` is between 128 and 255. UTF-8 representation is: + // 128-191: `0xC2`, followed by code point value. + // 192-255: `0xC3`, followed by code point value - 64. + let bytes = [0xC0 + first_digit, value & 0b1011_1111]; + // SAFETY: `bytes` is a valid 2-byte UTF-8 sequence + unsafe { text.as_mut_vec().extend_from_slice(&bytes) }; + return; } } - '0'..='3' => { - if matches!(self.peek_byte(), Some(b'0'..=b'7')) { - let b = self.consume_char(); - num.push(b); - if matches!(self.peek_byte(), Some(b'0'..=b'7')) { - let c = self.consume_char(); - num.push(c); - } - } - } - _ => {} } - let value = - char::from_u32(u32::from_str_radix(num.as_str(), 8).unwrap()).unwrap(); - text.push(value); + // SAFETY: `value` is in range 0 to `((1 * 8) + 7) * 8 + 7` (127) i.e. ASCII + unsafe { text.as_mut_vec().push(value) }; } '0' if in_template && self.peek_byte().is_some_and(|b| b.is_ascii_digit()) => { self.consume_char(); diff --git a/crates/oxc_transformer/src/jsx/refresh.rs b/crates/oxc_transformer/src/jsx/refresh.rs index 6cb156a3c8e5a..102de59dfe9ee 100644 --- a/crates/oxc_transformer/src/jsx/refresh.rs +++ b/crates/oxc_transformer/src/jsx/refresh.rs @@ -559,7 +559,9 @@ impl<'a> ReactRefresh<'a, '_> { let mut hashed_key = ArenaVec::from_array_in([0; ENCODED_LEN], ctx.ast.allocator); let encoded_bytes = BASE64_STANDARD.encode_slice(hash, &mut hashed_key).unwrap(); debug_assert_eq!(encoded_bytes, ENCODED_LEN); - let hashed_key = ArenaString::from_utf8(hashed_key).unwrap(); + // SAFETY: Base64 encoding only produces ASCII bytes. Even if our assumptions are incorrect, + // and Base64 bytes do not fill `hashed_key` completely, the remaining bytes are 0, so also ASCII + let hashed_key = unsafe { ArenaString::from_utf8_unchecked(hashed_key) }; Atom::from(hashed_key) };