Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions crates/oxc_codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,12 @@ impl<'a> Codegen<'a> {
self.code.print_str(s);
}

/// Push `char` into the buffer.
#[inline]
pub fn print_char(&mut self, ch: char) {
self.code.print_char(ch);
}

/// Print a single [`Expression`], adding it to the code generator's
/// internal buffer. Unlike [`Codegen::build`], this does not consume `self`.
#[inline]
Expand Down Expand Up @@ -578,14 +584,7 @@ impl<'a> Codegen<'a> {

fn print_string_literal(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) {
self.add_source_mapping(s.span);
if s.lone_surrogates {
self.print_str(s.raw.unwrap().as_str());
return;
}
self.print_quoted_utf16(s, allow_backtick);
}

fn print_quoted_utf16(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) {
let quote = if self.options.minify {
let mut single_cost: i32 = 0;
let mut double_cost: i32 = 0;
Expand Down Expand Up @@ -680,6 +679,26 @@ impl<'a> Codegen<'a> {
}
self.print_ascii_byte(b'$');
}
'\u{FFFD}' if s.lone_surrogates => {
// If `lone_surrogates` is set, string contains lone surrogates which are escaped
// using the lossy replacement character (U+FFFD) as an escape marker.
// The lone surrogate is encoded as `\u{FFFD}XXXX` where `XXXX` is the code point as hex.
let hex1 = chars.next().unwrap();
let hex2 = chars.next().unwrap();
let hex3 = chars.next().unwrap();
let hex4 = chars.next().unwrap();
if [hex1, hex2, hex3, hex4] == ['f', 'f', 'f', 'd'] {
// Actual lossy replacement character
self.print_char('\u{FFFD}');
} else {
// Lossy replacement character representing a lone surrogate
self.print_str("\\u");
self.print_char(hex1);
self.print_char(hex2);
self.print_char(hex3);
self.print_char(hex4);
}
}
_ => self.print_str(c.encode_utf8([0; 4].as_mut())),
}
}
Expand Down
10 changes: 5 additions & 5 deletions crates/oxc_codegen/tests/integration/esbuild.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,15 +363,15 @@ fn test_string() {
test("let x = '\\U000123AB'", "let x = \"U000123AB\";\n");
test("let x = '\\u{123AB}'", "let x = \"\u{123ab}\";\n");
test("let x = '\\uD808\\uDFAB'", "let x = \"\u{123ab}\";\n");
test("let x = '\\uD808'", "let x = '\\uD808';\n"); // lone surrogate
test("let x = '\\uD808X'", "let x = '\\uD808X';\n");
test("let x = '\\uDFAB'", "let x = '\\uDFAB';\n");
test("let x = '\\uDFABX'", "let x = '\\uDFABX';\n");
test("let x = '\\uD808'", "let x = \"\\ud808\";\n"); // lone surrogate
test("let x = '\\uD808X'", "let x = \"\\ud808X\";\n");
test("let x = '\\uDFAB'", "let x = \"\\udfab\";\n");
test("let x = '\\uDFABX'", "let x = \"\\udfabX\";\n");

test("let x = '\\x80'", "let x = \"\u{80}\";\n");
test("let x = '\\xFF'", "let x = \"ÿ\";\n");
test("let x = '\\xF0\\x9F\\x8D\\x95'", "let x = \"ð\u{9f}\u{8d}\u{95}\";\n");
test("let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x = '\\uD801\\uDC02\\uDC03\\uD804';\n"); // lossy
test("let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x = \"𐐂\\udc03\\ud804\";\n"); // surrogates
}

#[test]
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_codegen/tests/integration/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ fn unicode_escape() {
test("console.log('こんにちは');", "console.log(\"こんにちは\");\n");
test("console.log('안녕하세요');", "console.log(\"안녕하세요\");\n");
test("console.log('🧑‍🤝‍🧑');", "console.log(\"🧑‍🤝‍🧑\");\n");
test("console.log(\"\\uD800\\uD801\")", "console.log(\"\\uD800\\uD801\");\n");
test("console.log(\"\\uD800\\uD801\")", "console.log(\"\\ud800\\ud801\");\n");
}

#[test]
Expand Down