diff --git a/crates/oxc_ast/src/ast/literal.rs b/crates/oxc_ast/src/ast/literal.rs index 9f60d207dd182..3911784fa7683 100644 --- a/crates/oxc_ast/src/ast/literal.rs +++ b/crates/oxc_ast/src/ast/literal.rs @@ -83,6 +83,11 @@ pub struct StringLiteral<'a> { /// `None` when this ast node is not constructed from the parser. #[content_eq(skip)] pub raw: Option>, + + /// The string value contains replacement character (U+FFFD). + #[builder(default)] + #[estree(skip)] + pub lossy: bool, } /// BigInt literal diff --git a/crates/oxc_ast/src/generated/assert_layouts.rs b/crates/oxc_ast/src/generated/assert_layouts.rs index 028b94c6a9971..f45db57d750b2 100644 --- a/crates/oxc_ast/src/generated/assert_layouts.rs +++ b/crates/oxc_ast/src/generated/assert_layouts.rs @@ -295,11 +295,11 @@ const _: () = { assert!(size_of::() == 16); assert!(align_of::() == 8); - assert!(size_of::() == 64); + assert!(size_of::() == 72); assert!(align_of::() == 8); assert!(offset_of!(Directive, span) == 0); assert!(offset_of!(Directive, expression) == 8); - assert!(offset_of!(Directive, directive) == 48); + assert!(offset_of!(Directive, directive) == 56); assert!(size_of::() == 24); assert!(align_of::() == 8); @@ -666,14 +666,14 @@ const _: () = { assert!(offset_of!(ImportExpression, options) == 24); assert!(offset_of!(ImportExpression, phase) == 56); - assert!(size_of::() == 104); + assert!(size_of::() == 112); assert!(align_of::() == 8); assert!(offset_of!(ImportDeclaration, span) == 0); assert!(offset_of!(ImportDeclaration, specifiers) == 8); assert!(offset_of!(ImportDeclaration, source) == 40); - assert!(offset_of!(ImportDeclaration, phase) == 80); - assert!(offset_of!(ImportDeclaration, with_clause) == 88); - assert!(offset_of!(ImportDeclaration, import_kind) == 96); + assert!(offset_of!(ImportDeclaration, phase) == 88); + assert!(offset_of!(ImportDeclaration, with_clause) == 96); + assert!(offset_of!(ImportDeclaration, import_kind) == 104); assert!(size_of::() == 1); assert!(align_of::() == 1); @@ -681,12 +681,12 @@ const _: () = { assert!(size_of::() == 16); assert!(align_of::() == 8); - assert!(size_of::() == 96); + assert!(size_of::() == 104); assert!(align_of::() == 8); assert!(offset_of!(ImportSpecifier, span) == 0); assert!(offset_of!(ImportSpecifier, imported) == 8); - assert!(offset_of!(ImportSpecifier, local) == 56); - assert!(offset_of!(ImportSpecifier, import_kind) == 88); + assert!(offset_of!(ImportSpecifier, local) == 64); + assert!(offset_of!(ImportSpecifier, import_kind) == 96); assert!(size_of::() == 40); assert!(align_of::() == 8); @@ -704,49 +704,49 @@ const _: () = { assert!(offset_of!(WithClause, attributes_keyword) == 8); assert!(offset_of!(WithClause, with_entries) == 32); - assert!(size_of::() == 96); + assert!(size_of::() == 112); assert!(align_of::() == 8); assert!(offset_of!(ImportAttribute, span) == 0); assert!(offset_of!(ImportAttribute, key) == 8); - assert!(offset_of!(ImportAttribute, value) == 56); + assert!(offset_of!(ImportAttribute, value) == 64); - assert!(size_of::() == 48); + assert!(size_of::() == 56); assert!(align_of::() == 8); - assert!(size_of::() == 112); + assert!(size_of::() == 120); assert!(align_of::() == 8); assert!(offset_of!(ExportNamedDeclaration, span) == 0); assert!(offset_of!(ExportNamedDeclaration, declaration) == 8); assert!(offset_of!(ExportNamedDeclaration, specifiers) == 24); assert!(offset_of!(ExportNamedDeclaration, source) == 56); - assert!(offset_of!(ExportNamedDeclaration, export_kind) == 96); - assert!(offset_of!(ExportNamedDeclaration, with_clause) == 104); + assert!(offset_of!(ExportNamedDeclaration, export_kind) == 104); + assert!(offset_of!(ExportNamedDeclaration, with_clause) == 112); - assert!(size_of::() == 72); + assert!(size_of::() == 80); assert!(align_of::() == 8); assert!(offset_of!(ExportDefaultDeclaration, span) == 0); assert!(offset_of!(ExportDefaultDeclaration, exported) == 8); - assert!(offset_of!(ExportDefaultDeclaration, declaration) == 56); + assert!(offset_of!(ExportDefaultDeclaration, declaration) == 64); - assert!(size_of::() == 112); + assert!(size_of::() == 128); assert!(align_of::() == 8); assert!(offset_of!(ExportAllDeclaration, span) == 0); assert!(offset_of!(ExportAllDeclaration, exported) == 8); - assert!(offset_of!(ExportAllDeclaration, source) == 56); - assert!(offset_of!(ExportAllDeclaration, with_clause) == 96); - assert!(offset_of!(ExportAllDeclaration, export_kind) == 104); + assert!(offset_of!(ExportAllDeclaration, source) == 64); + assert!(offset_of!(ExportAllDeclaration, with_clause) == 112); + assert!(offset_of!(ExportAllDeclaration, export_kind) == 120); - assert!(size_of::() == 112); + assert!(size_of::() == 128); assert!(align_of::() == 8); assert!(offset_of!(ExportSpecifier, span) == 0); assert!(offset_of!(ExportSpecifier, local) == 8); - assert!(offset_of!(ExportSpecifier, exported) == 56); - assert!(offset_of!(ExportSpecifier, export_kind) == 104); + assert!(offset_of!(ExportSpecifier, exported) == 64); + assert!(offset_of!(ExportSpecifier, export_kind) == 120); assert!(size_of::() == 16); assert!(align_of::() == 8); - assert!(size_of::() == 48); + assert!(size_of::() == 56); assert!(align_of::() == 8); assert!(size_of::() == 64); @@ -771,11 +771,12 @@ const _: () = { assert!(offset_of!(NumericLiteral, raw) == 16); assert!(offset_of!(NumericLiteral, base) == 32); - assert!(size_of::() == 40); + assert!(size_of::() == 48); assert!(align_of::() == 8); assert!(offset_of!(StringLiteral, span) == 0); assert!(offset_of!(StringLiteral, value) == 8); assert!(offset_of!(StringLiteral, raw) == 24); + assert!(offset_of!(StringLiteral, lossy) == 40); assert!(size_of::() == 32); assert!(align_of::() == 8); @@ -1211,19 +1212,19 @@ const _: () = { assert!(size_of::() == 16); assert!(align_of::() == 8); - assert!(size_of::() == 80); + assert!(size_of::() == 88); assert!(align_of::() == 8); assert!(offset_of!(TSModuleDeclaration, span) == 0); assert!(offset_of!(TSModuleDeclaration, id) == 8); - assert!(offset_of!(TSModuleDeclaration, body) == 56); - assert!(offset_of!(TSModuleDeclaration, kind) == 72); - assert!(offset_of!(TSModuleDeclaration, declare) == 73); - assert!(offset_of!(TSModuleDeclaration, scope_id) == 76); + assert!(offset_of!(TSModuleDeclaration, body) == 64); + assert!(offset_of!(TSModuleDeclaration, kind) == 80); + assert!(offset_of!(TSModuleDeclaration, declare) == 81); + assert!(offset_of!(TSModuleDeclaration, scope_id) == 84); assert!(size_of::() == 1); assert!(align_of::() == 1); - assert!(size_of::() == 48); + assert!(size_of::() == 56); assert!(align_of::() == 8); assert!(size_of::() == 16); @@ -1327,7 +1328,7 @@ const _: () = { assert!(size_of::() == 16); assert!(align_of::() == 8); - assert!(size_of::() == 48); + assert!(size_of::() == 56); assert!(align_of::() == 8); assert!(offset_of!(TSExternalModuleReference, span) == 0); assert!(offset_of!(TSExternalModuleReference, expression) == 8); @@ -1685,11 +1686,11 @@ const _: () = { assert!(size_of::() == 8); assert!(align_of::() == 4); - assert!(size_of::() == 40); + assert!(size_of::() == 44); assert!(align_of::() == 4); assert!(offset_of!(Directive, span) == 0); assert!(offset_of!(Directive, expression) == 8); - assert!(offset_of!(Directive, directive) == 32); + assert!(offset_of!(Directive, directive) == 36); assert!(size_of::() == 16); assert!(align_of::() == 4); @@ -2056,14 +2057,14 @@ const _: () = { assert!(offset_of!(ImportExpression, options) == 16); assert!(offset_of!(ImportExpression, phase) == 32); - assert!(size_of::() == 60); + assert!(size_of::() == 64); assert!(align_of::() == 4); assert!(offset_of!(ImportDeclaration, span) == 0); assert!(offset_of!(ImportDeclaration, specifiers) == 8); assert!(offset_of!(ImportDeclaration, source) == 24); - assert!(offset_of!(ImportDeclaration, phase) == 48); - assert!(offset_of!(ImportDeclaration, with_clause) == 52); - assert!(offset_of!(ImportDeclaration, import_kind) == 56); + assert!(offset_of!(ImportDeclaration, phase) == 52); + assert!(offset_of!(ImportDeclaration, with_clause) == 56); + assert!(offset_of!(ImportDeclaration, import_kind) == 60); assert!(size_of::() == 1); assert!(align_of::() == 1); @@ -2071,12 +2072,12 @@ const _: () = { assert!(size_of::() == 8); assert!(align_of::() == 4); - assert!(size_of::() == 60); + assert!(size_of::() == 64); assert!(align_of::() == 4); assert!(offset_of!(ImportSpecifier, span) == 0); assert!(offset_of!(ImportSpecifier, imported) == 8); - assert!(offset_of!(ImportSpecifier, local) == 36); - assert!(offset_of!(ImportSpecifier, import_kind) == 56); + assert!(offset_of!(ImportSpecifier, local) == 40); + assert!(offset_of!(ImportSpecifier, import_kind) == 60); assert!(size_of::() == 28); assert!(align_of::() == 4); @@ -2094,49 +2095,49 @@ const _: () = { assert!(offset_of!(WithClause, attributes_keyword) == 8); assert!(offset_of!(WithClause, with_entries) == 24); - assert!(size_of::() == 60); + assert!(size_of::() == 68); assert!(align_of::() == 4); assert!(offset_of!(ImportAttribute, span) == 0); assert!(offset_of!(ImportAttribute, key) == 8); - assert!(offset_of!(ImportAttribute, value) == 36); + assert!(offset_of!(ImportAttribute, value) == 40); - assert!(size_of::() == 28); + assert!(size_of::() == 32); assert!(align_of::() == 4); - assert!(size_of::() == 64); + assert!(size_of::() == 68); assert!(align_of::() == 4); assert!(offset_of!(ExportNamedDeclaration, span) == 0); assert!(offset_of!(ExportNamedDeclaration, declaration) == 8); assert!(offset_of!(ExportNamedDeclaration, specifiers) == 16); assert!(offset_of!(ExportNamedDeclaration, source) == 32); - assert!(offset_of!(ExportNamedDeclaration, export_kind) == 56); - assert!(offset_of!(ExportNamedDeclaration, with_clause) == 60); + assert!(offset_of!(ExportNamedDeclaration, export_kind) == 60); + assert!(offset_of!(ExportNamedDeclaration, with_clause) == 64); - assert!(size_of::() == 44); + assert!(size_of::() == 48); assert!(align_of::() == 4); assert!(offset_of!(ExportDefaultDeclaration, span) == 0); assert!(offset_of!(ExportDefaultDeclaration, exported) == 8); - assert!(offset_of!(ExportDefaultDeclaration, declaration) == 36); + assert!(offset_of!(ExportDefaultDeclaration, declaration) == 40); - assert!(size_of::() == 68); + assert!(size_of::() == 76); assert!(align_of::() == 4); assert!(offset_of!(ExportAllDeclaration, span) == 0); assert!(offset_of!(ExportAllDeclaration, exported) == 8); - assert!(offset_of!(ExportAllDeclaration, source) == 36); - assert!(offset_of!(ExportAllDeclaration, with_clause) == 60); - assert!(offset_of!(ExportAllDeclaration, export_kind) == 64); + assert!(offset_of!(ExportAllDeclaration, source) == 40); + assert!(offset_of!(ExportAllDeclaration, with_clause) == 68); + assert!(offset_of!(ExportAllDeclaration, export_kind) == 72); - assert!(size_of::() == 68); + assert!(size_of::() == 76); assert!(align_of::() == 4); assert!(offset_of!(ExportSpecifier, span) == 0); assert!(offset_of!(ExportSpecifier, local) == 8); - assert!(offset_of!(ExportSpecifier, exported) == 36); - assert!(offset_of!(ExportSpecifier, export_kind) == 64); + assert!(offset_of!(ExportSpecifier, exported) == 40); + assert!(offset_of!(ExportSpecifier, export_kind) == 72); assert!(size_of::() == 8); assert!(align_of::() == 4); - assert!(size_of::() == 28); + assert!(size_of::() == 32); assert!(align_of::() == 4); assert!(size_of::() == 40); @@ -2161,11 +2162,12 @@ const _: () = { assert!(offset_of!(NumericLiteral, raw) == 16); assert!(offset_of!(NumericLiteral, base) == 24); - assert!(size_of::() == 24); + assert!(size_of::() == 28); assert!(align_of::() == 4); assert!(offset_of!(StringLiteral, span) == 0); assert!(offset_of!(StringLiteral, value) == 8); assert!(offset_of!(StringLiteral, raw) == 16); + assert!(offset_of!(StringLiteral, lossy) == 24); assert!(size_of::() == 20); assert!(align_of::() == 4); @@ -2601,19 +2603,19 @@ const _: () = { assert!(size_of::() == 12); assert!(align_of::() == 4); - assert!(size_of::() == 52); + assert!(size_of::() == 56); assert!(align_of::() == 4); assert!(offset_of!(TSModuleDeclaration, span) == 0); assert!(offset_of!(TSModuleDeclaration, id) == 8); - assert!(offset_of!(TSModuleDeclaration, body) == 36); - assert!(offset_of!(TSModuleDeclaration, kind) == 44); - assert!(offset_of!(TSModuleDeclaration, declare) == 45); - assert!(offset_of!(TSModuleDeclaration, scope_id) == 48); + assert!(offset_of!(TSModuleDeclaration, body) == 40); + assert!(offset_of!(TSModuleDeclaration, kind) == 48); + assert!(offset_of!(TSModuleDeclaration, declare) == 49); + assert!(offset_of!(TSModuleDeclaration, scope_id) == 52); assert!(size_of::() == 1); assert!(align_of::() == 1); - assert!(size_of::() == 28); + assert!(size_of::() == 32); assert!(align_of::() == 4); assert!(size_of::() == 8); @@ -2717,7 +2719,7 @@ const _: () = { assert!(size_of::() == 8); assert!(align_of::() == 4); - assert!(size_of::() == 32); + assert!(size_of::() == 36); assert!(align_of::() == 4); assert!(offset_of!(TSExternalModuleReference, span) == 0); assert!(offset_of!(TSExternalModuleReference, expression) == 8); diff --git a/crates/oxc_ast/src/generated/ast_builder.rs b/crates/oxc_ast/src/generated/ast_builder.rs index c26c008ef8a99..061302d1cf1bf 100644 --- a/crates/oxc_ast/src/generated/ast_builder.rs +++ b/crates/oxc_ast/src/generated/ast_builder.rs @@ -276,6 +276,29 @@ impl<'a> AstBuilder<'a> { Expression::StringLiteral(self.alloc_string_literal(span, value, raw)) } + /// Build an [`Expression::StringLiteral`] with `lossy`. + /// + /// This node contains a [`StringLiteral`] that will be stored in the memory arena. + /// + /// ## Parameters + /// * `span`: Node location in source code + /// * `value`: The value of the string. + /// * `raw`: The raw string as it appears in source code. + /// * `lossy` + #[inline] + pub fn expression_string_literal_with_lossy( + self, + span: Span, + value: A, + raw: Option>, + lossy: bool, + ) -> Expression<'a> + where + A: IntoIn<'a, Atom<'a>>, + { + Expression::StringLiteral(self.alloc_string_literal_with_lossy(span, value, raw, lossy)) + } + /// Build an [`Expression::TemplateLiteral`]. /// /// This node contains a [`TemplateLiteral`] that will be stored in the memory arena. @@ -7820,6 +7843,27 @@ impl<'a> AstBuilder<'a> { ImportAttributeKey::StringLiteral(self.string_literal(span, value, raw)) } + /// Build an [`ImportAttributeKey::StringLiteral`] with `lossy`. + /// + /// ## Parameters + /// * `span`: Node location in source code + /// * `value`: The value of the string. + /// * `raw`: The raw string as it appears in source code. + /// * `lossy` + #[inline] + pub fn import_attribute_key_string_literal_with_lossy( + self, + span: Span, + value: A, + raw: Option>, + lossy: bool, + ) -> ImportAttributeKey<'a> + where + A: IntoIn<'a, Atom<'a>>, + { + ImportAttributeKey::StringLiteral(self.string_literal_with_lossy(span, value, raw, lossy)) + } + /// Build an [`ExportNamedDeclaration`]. /// /// If you want the built node to be allocated in the memory arena, use [`AstBuilder::alloc_export_named_declaration`] instead. @@ -8398,6 +8442,27 @@ impl<'a> AstBuilder<'a> { ModuleExportName::StringLiteral(self.string_literal(span, value, raw)) } + /// Build a [`ModuleExportName::StringLiteral`] with `lossy`. + /// + /// ## Parameters + /// * `span`: Node location in source code + /// * `value`: The value of the string. + /// * `raw`: The raw string as it appears in source code. + /// * `lossy` + #[inline] + pub fn module_export_name_string_literal_with_lossy( + self, + span: Span, + value: A, + raw: Option>, + lossy: bool, + ) -> ModuleExportName<'a> + where + A: IntoIn<'a, Atom<'a>>, + { + ModuleExportName::StringLiteral(self.string_literal_with_lossy(span, value, raw, lossy)) + } + /// Build a [`V8IntrinsicExpression`]. /// /// If you want the built node to be allocated in the memory arena, use [`AstBuilder::alloc_v_8_intrinsic_expression`] instead. @@ -8533,7 +8598,7 @@ impl<'a> AstBuilder<'a> { where A: IntoIn<'a, Atom<'a>>, { - StringLiteral { span, value: value.into_in(self.allocator), raw } + StringLiteral { span, value: value.into_in(self.allocator), raw, lossy: Default::default() } } /// Build a [`StringLiteral`], and store it in the memory arena. @@ -8557,6 +8622,52 @@ impl<'a> AstBuilder<'a> { Box::new_in(self.string_literal(span, value, raw), self.allocator) } + /// Build a [`StringLiteral`] with `lossy`. + /// + /// If you want the built node to be allocated in the memory arena, use [`AstBuilder::alloc_string_literal_with_lossy`] instead. + /// + /// ## Parameters + /// * `span`: Node location in source code + /// * `value`: The value of the string. + /// * `raw`: The raw string as it appears in source code. + /// * `lossy` + #[inline] + pub fn string_literal_with_lossy( + self, + span: Span, + value: A, + raw: Option>, + lossy: bool, + ) -> StringLiteral<'a> + where + A: IntoIn<'a, Atom<'a>>, + { + StringLiteral { span, value: value.into_in(self.allocator), raw, lossy } + } + + /// Build a [`StringLiteral`] with `lossy`, and store it in the memory arena. + /// + /// Returns a [`Box`] containing the newly-allocated node. If you want a stack-allocated node, use [`AstBuilder::string_literal_with_lossy`] instead. + /// + /// ## Parameters + /// * `span`: Node location in source code + /// * `value`: The value of the string. + /// * `raw`: The raw string as it appears in source code. + /// * `lossy` + #[inline] + pub fn alloc_string_literal_with_lossy( + self, + span: Span, + value: A, + raw: Option>, + lossy: bool, + ) -> Box<'a, StringLiteral<'a>> + where + A: IntoIn<'a, Atom<'a>>, + { + Box::new_in(self.string_literal_with_lossy(span, value, raw, lossy), self.allocator) + } + /// Build a [`BigIntLiteral`]. /// /// If you want the built node to be allocated in the memory arena, use [`AstBuilder::alloc_big_int_literal`] instead. @@ -9333,6 +9444,31 @@ impl<'a> AstBuilder<'a> { JSXAttributeValue::StringLiteral(self.alloc_string_literal(span, value, raw)) } + /// Build a [`JSXAttributeValue::StringLiteral`] with `lossy`. + /// + /// This node contains a [`StringLiteral`] that will be stored in the memory arena. + /// + /// ## Parameters + /// * `span`: Node location in source code + /// * `value`: The value of the string. + /// * `raw`: The raw string as it appears in source code. + /// * `lossy` + #[inline] + pub fn jsx_attribute_value_string_literal_with_lossy( + self, + span: Span, + value: A, + raw: Option>, + lossy: bool, + ) -> JSXAttributeValue<'a> + where + A: IntoIn<'a, Atom<'a>>, + { + JSXAttributeValue::StringLiteral( + self.alloc_string_literal_with_lossy(span, value, raw, lossy), + ) + } + /// Build a [`JSXAttributeValue::ExpressionContainer`]. /// /// This node contains a [`JSXExpressionContainer`] that will be stored in the memory arena. @@ -9813,6 +9949,29 @@ impl<'a> AstBuilder<'a> { TSEnumMemberName::String(self.alloc_string_literal(span, value, raw)) } + /// Build a [`TSEnumMemberName::String`] with `lossy`. + /// + /// This node contains a [`StringLiteral`] that will be stored in the memory arena. + /// + /// ## Parameters + /// * `span`: Node location in source code + /// * `value`: The value of the string. + /// * `raw`: The raw string as it appears in source code. + /// * `lossy` + #[inline] + pub fn ts_enum_member_name_string_with_lossy( + self, + span: Span, + value: A, + raw: Option>, + lossy: bool, + ) -> TSEnumMemberName<'a> + where + A: IntoIn<'a, Atom<'a>>, + { + TSEnumMemberName::String(self.alloc_string_literal_with_lossy(span, value, raw, lossy)) + } + /// Build a [`TSTypeAnnotation`]. /// /// If you want the built node to be allocated in the memory arena, use [`AstBuilder::alloc_ts_type_annotation`] instead. @@ -9947,6 +10106,29 @@ impl<'a> AstBuilder<'a> { TSLiteral::StringLiteral(self.alloc_string_literal(span, value, raw)) } + /// Build a [`TSLiteral::StringLiteral`] with `lossy`. + /// + /// This node contains a [`StringLiteral`] that will be stored in the memory arena. + /// + /// ## Parameters + /// * `span`: Node location in source code + /// * `value`: The value of the string. + /// * `raw`: The raw string as it appears in source code. + /// * `lossy` + #[inline] + pub fn ts_literal_string_literal_with_lossy( + self, + span: Span, + value: A, + raw: Option>, + lossy: bool, + ) -> TSLiteral<'a> + where + A: IntoIn<'a, Atom<'a>>, + { + TSLiteral::StringLiteral(self.alloc_string_literal_with_lossy(span, value, raw, lossy)) + } + /// Build a [`TSLiteral::TemplateLiteral`]. /// /// This node contains a [`TemplateLiteral`] that will be stored in the memory arena. @@ -13205,6 +13387,29 @@ impl<'a> AstBuilder<'a> { TSModuleDeclarationName::StringLiteral(self.string_literal(span, value, raw)) } + /// Build a [`TSModuleDeclarationName::StringLiteral`] with `lossy`. + /// + /// ## Parameters + /// * `span`: Node location in source code + /// * `value`: The value of the string. + /// * `raw`: The raw string as it appears in source code. + /// * `lossy` + #[inline] + pub fn ts_module_declaration_name_string_literal_with_lossy( + self, + span: Span, + value: A, + raw: Option>, + lossy: bool, + ) -> TSModuleDeclarationName<'a> + where + A: IntoIn<'a, Atom<'a>>, + { + TSModuleDeclarationName::StringLiteral( + self.string_literal_with_lossy(span, value, raw, lossy), + ) + } + /// Build a [`TSModuleDeclarationBody::TSModuleDeclaration`]. /// /// This node contains a [`TSModuleDeclaration`] that will be stored in the memory arena. diff --git a/crates/oxc_ast/src/generated/derive_clone_in.rs b/crates/oxc_ast/src/generated/derive_clone_in.rs index 6c77cbcb942e8..dfe09366a8e8c 100644 --- a/crates/oxc_ast/src/generated/derive_clone_in.rs +++ b/crates/oxc_ast/src/generated/derive_clone_in.rs @@ -2535,6 +2535,7 @@ impl<'new_alloc> CloneIn<'new_alloc> for StringLiteral<'_> { span: CloneIn::clone_in(&self.span, allocator), value: CloneIn::clone_in(&self.value, allocator), raw: CloneIn::clone_in(&self.raw, allocator), + lossy: CloneIn::clone_in(&self.lossy, allocator), } } } diff --git a/crates/oxc_ast/src/generated/derive_content_eq.rs b/crates/oxc_ast/src/generated/derive_content_eq.rs index 8296b11df136d..f21c825d68a08 100644 --- a/crates/oxc_ast/src/generated/derive_content_eq.rs +++ b/crates/oxc_ast/src/generated/derive_content_eq.rs @@ -1471,6 +1471,7 @@ impl ContentEq for NumericLiteral<'_> { impl ContentEq for StringLiteral<'_> { fn content_eq(&self, other: &Self) -> bool { ContentEq::content_eq(&self.value, &other.value) + && ContentEq::content_eq(&self.lossy, &other.lossy) } } diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs index 67b2daf90276b..b677465fb2034 100644 --- a/crates/oxc_codegen/src/gen.rs +++ b/crates/oxc_codegen/src/gen.rs @@ -1675,9 +1675,7 @@ impl Gen for PropertyKey<'_> { match self { Self::StaticIdentifier(ident) => ident.print(p, ctx), Self::PrivateIdentifier(ident) => ident.print(p, ctx), - Self::StringLiteral(s) => { - p.print_quoted_utf16(s.value.as_str(), /* allow_backtick */ false); - } + Self::StringLiteral(s) => p.print_string_literal(s, /* allow_backtick */ false), _ => self.to_expression().print_expr(p, Precedence::Comma, Context::empty()), } } diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs index 390f27f5461b9..b74a3db51e4f0 100644 --- a/crates/oxc_codegen/src/lib.rs +++ b/crates/oxc_codegen/src/lib.rs @@ -578,16 +578,19 @@ impl<'a> Codegen<'a> { fn print_string_literal(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) { self.add_source_mapping(s.span); - let s = s.value.as_str(); + if s.lossy { + self.print_str(s.raw.unwrap().as_str()); + return; + } self.print_quoted_utf16(s, allow_backtick); } - fn print_quoted_utf16(&mut self, s: &str, allow_backtick: bool) { + fn print_quoted_utf16(&mut self, s: &StringLiteral<'_>, allow_backtick: bool) { let quote = if self.options.minify { let mut single_cost: i32 = 0; let mut double_cost: i32 = 0; let mut backtick_cost: i32 = 0; - let mut bytes = s.as_bytes().iter().peekable(); + let mut bytes = s.value.as_bytes().iter().peekable(); while let Some(b) = bytes.next() { match b { b'\n' if self.options.minify => backtick_cost = backtick_cost.saturating_sub(1), @@ -621,8 +624,8 @@ impl<'a> Codegen<'a> { self.print_ascii_byte(quote); } - fn print_unquoted_utf16(&mut self, s: &str, quote: u8) { - let mut chars = s.chars().peekable(); + fn print_unquoted_utf16(&mut self, s: &StringLiteral<'_>, quote: u8) { + let mut chars = s.value.chars().peekable(); while let Some(c) = chars.next() { match c { diff --git a/crates/oxc_codegen/tests/integration/esbuild.rs b/crates/oxc_codegen/tests/integration/esbuild.rs index 4c13cba602673..172f0290475be 100644 --- a/crates/oxc_codegen/tests/integration/esbuild.rs +++ b/crates/oxc_codegen/tests/integration/esbuild.rs @@ -360,18 +360,18 @@ fn test_string() { test("let x = '\\x1B'", "let x = \"\\x1B\";\n"); test("let x = '\u{ABCD}'", "let x = \"\u{ABCD}\";\n"); test("let x = '\\uABCD'", "let x = \"\u{ABCD}\";\n"); - // test( "let x = '\U000123AB'", "let x = \"\U000123AB\";\n"); - // test( "let x = '\\u{123AB}'", "let x = \"\U000123AB\";\n"); - // test( "let x = '\\uD808\\uDFAB'", "let x = \"\U000123AB\";\n"); - // test( "let x = '\\uD808'", "let x = \"\\uD808\";\n"); - // test( "let x = '\\uD808X'", "let x = \"\\uD808X\";\n"); - // test( "let x = '\\uDFAB'", "let x = \"\\uDFAB\";\n"); - // test( "let x = '\\uDFABX'", "let x = \"\\uDFABX\";\n"); - - // test( "let x = '\\x80'", "let x = \"\U00000080\";\n"); - // test( "let x = '\\xFF'", "let x = \"\U000000FF\";\n"); - // test( "let x = '\\xF0\\x9F\\x8D\\x95'", "let x = \"\U000000F0\U0000009F\U0000008D\U00000095\";\n"); - // test( "let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x = \"\U00010402\\uDC03\\uD804\";\n"); + test("let x = '\\U000123AB'", "let x = \"U000123AB\";\n"); + test("let x = '\\u{123AB}'", "let x = \"\u{123ab}\";\n"); + test("let x = '\\uD808\\uDFAB'", "let x = \"\u{123ab}\";\n"); + test("let x = '\\uD808'", "let x = '\\uD808';\n"); // lone surrogate + test("let x = '\\uD808X'", "let x = '\\uD808X';\n"); + test("let x = '\\uDFAB'", "let x = '\\uDFAB';\n"); + test("let x = '\\uDFABX'", "let x = '\\uDFABX';\n"); + + test("let x = '\\x80'", "let x = \"\u{80}\";\n"); + test("let x = '\\xFF'", "let x = \"ΓΏ\";\n"); + test("let x = '\\xF0\\x9F\\x8D\\x95'", "let x = \"Γ°\u{9f}\u{8d}\u{95}\";\n"); + test("let x = '\\uD801\\uDC02\\uDC03\\uD804'", "let x = '\\uD801\\uDC02\\uDC03\\uD804';\n"); // lossy } #[test] diff --git a/crates/oxc_codegen/tests/integration/tester.rs b/crates/oxc_codegen/tests/integration/tester.rs index 85d386c803907..57a4f370ee90f 100644 --- a/crates/oxc_codegen/tests/integration/tester.rs +++ b/crates/oxc_codegen/tests/integration/tester.rs @@ -3,6 +3,7 @@ use oxc_codegen::{CodeGenerator, CodegenOptions}; use oxc_parser::{ParseOptions, Parser}; use oxc_span::SourceType; +#[track_caller] pub fn test_with_parse_options(source_text: &str, expected: &str, parse_options: ParseOptions) { let allocator = Allocator::default(); let ret = @@ -11,18 +12,22 @@ pub fn test_with_parse_options(source_text: &str, expected: &str, parse_options: assert_eq!(result, expected, "\nfor source: {source_text}"); } +#[track_caller] pub fn test(source_text: &str, expected: &str) { test_options(source_text, expected, CodegenOptions::default()); } +#[track_caller] pub fn test_same(source_text: &str) { test(source_text, source_text); } +#[track_caller] pub fn test_options(source_text: &str, expected: &str, options: CodegenOptions) { test_options_with_source_type(source_text, expected, SourceType::jsx(), options); } +#[track_caller] pub fn test_tsx(source_text: &str, expected: &str) { test_options_with_source_type( source_text, @@ -32,6 +37,7 @@ pub fn test_tsx(source_text: &str, expected: &str) { ); } +#[track_caller] pub fn test_options_with_source_type( source_text: &str, expected: &str, @@ -44,6 +50,7 @@ pub fn test_options_with_source_type( assert_eq!(result, expected, "\nfor source: {source_text:?}"); } +#[track_caller] pub fn test_minify(source_text: &str, expected: &str) { let source_type = SourceType::jsx(); let allocator = Allocator::default(); @@ -55,6 +62,7 @@ pub fn test_minify(source_text: &str, expected: &str) { assert_eq!(result, expected, "\nfor minify source: {source_text}"); } +#[track_caller] pub fn test_minify_same(source_text: &str) { test_minify(source_text, source_text); } diff --git a/crates/oxc_parser/src/js/expression.rs b/crates/oxc_parser/src/js/expression.rs index 52d5c400dacce..7d8c04301de51 100644 --- a/crates/oxc_parser/src/js/expression.rs +++ b/crates/oxc_parser/src/js/expression.rs @@ -406,6 +406,7 @@ impl<'a> ParserImpl<'a> { } let value = self.cur_string(); let span = self.start_span(); + let lossy = self.cur_token().lossy; self.bump_any(); let span = self.end_span(span); // SAFETY: @@ -413,7 +414,9 @@ impl<'a> ParserImpl<'a> { let raw = Atom::from(unsafe { self.source_text.get_unchecked(span.start as usize..span.end as usize) }); - Ok(self.ast.string_literal(span, value, Some(raw))) + let mut string_literal = self.ast.string_literal(span, value, Some(raw)); + string_literal.lossy = lossy; + Ok(string_literal) } /// Section [Array Expression](https://tc39.es/ecma262/#prod-ArrayLiteral) diff --git a/crates/oxc_parser/src/js/module.rs b/crates/oxc_parser/src/js/module.rs index 9af384be3209f..35526c1d74313 100644 --- a/crates/oxc_parser/src/js/module.rs +++ b/crates/oxc_parser/src/js/module.rs @@ -503,7 +503,7 @@ impl<'a> ParserImpl<'a> { let literal = self.parse_literal_string()?; // ModuleExportName : StringLiteral // It is a Syntax Error if IsStringWellFormedUnicode(the SV of StringLiteral) is false. - if !literal.is_string_well_formed_unicode() { + if literal.lossy || !literal.is_string_well_formed_unicode() { self.error(diagnostics::export_lone_surrogate(literal.span)); }; Ok(ModuleExportName::StringLiteral(literal)) diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index 63e3a774b6e6c..21a0bd6879e46 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -26,6 +26,9 @@ pub struct Token { /// [Lexer::escaped_templates]: [super::Lexer::escaped_templates] pub escaped: bool, + /// True if a string contains lossy replacement character (U+FFFD). + pub lossy: bool, + /// True if for numeric literal tokens that contain separator characters (`_`). /// /// Numeric literals are defined in Section 12.9.3 of the ECMAScript @@ -36,7 +39,7 @@ pub struct Token { // Padding to fill to 16 bytes. // This makes copying a `Token` 1 x xmmword load & store, rather than 1 x dword + 1 x qword // and `Token::default()` is 1 x xmmword store, rather than 1 x dword + 1 x qword. - _padding2: u32, + _padding2: u16, } impl Token { diff --git a/crates/oxc_parser/src/lexer/unicode.rs b/crates/oxc_parser/src/lexer/unicode.rs index 5b63fc8b875ec..dab7a096df828 100644 --- a/crates/oxc_parser/src/lexer/unicode.rs +++ b/crates/oxc_parser/src/lexer/unicode.rs @@ -124,15 +124,16 @@ impl<'a> Lexer<'a> { return; }; - // For strings and templates, surrogate pairs are valid grammar, e.g. `"\uD83D\uDE00" === πŸ˜€` - // values are interpreted as is if they fall out of range + // For strings and templates, surrogate pairs are valid grammar, e.g. `"\uD83D\uDE00" === πŸ˜€`. match value { SurrogatePair::CodePoint(code_point) | SurrogatePair::Astral(code_point) => { if let Ok(ch) = char::try_from(code_point) { text.push(ch); } else { - text.push_str("\\u"); - text.push_str(format!("{code_point:x}").as_str()); + // Turns lone surrogate into lossy replacement character (U+FFFD). + // A lone surrogate '\u{df06}' is not a valid UTF8 string. + text.push_str("\u{FFFD}"); + self.token.lossy = true; } } SurrogatePair::HighLow(high, low) => { diff --git a/napi/parser/deserialize-js.js b/napi/parser/deserialize-js.js index cad4a76f40c29..141df593ef1b0 100644 --- a/napi/parser/deserialize-js.js +++ b/napi/parser/deserialize-js.js @@ -449,7 +449,7 @@ function deserializeDirective(pos) { start: deserializeU32(pos), end: deserializeU32(pos + 4), expression: deserializeStringLiteral(pos + 8), - directive: deserializeStr(pos + 48), + directive: deserializeStr(pos + 56), }; } @@ -907,7 +907,7 @@ function deserializeImportExpression(pos) { function deserializeImportDeclaration(pos) { let specifiers = deserializeOptionVecImportDeclarationSpecifier(pos + 8); if (specifiers === null) specifiers = []; - const withClause = deserializeOptionBoxWithClause(pos + 88); + const withClause = deserializeOptionBoxWithClause(pos + 96); return { type: 'ImportDeclaration', start: deserializeU32(pos), @@ -924,7 +924,7 @@ function deserializeImportSpecifier(pos) { start: deserializeU32(pos), end: deserializeU32(pos + 4), imported: deserializeModuleExportName(pos + 8), - local: deserializeBindingIdentifier(pos + 56), + local: deserializeBindingIdentifier(pos + 64), }; } @@ -962,12 +962,12 @@ function deserializeImportAttribute(pos) { start: deserializeU32(pos), end: deserializeU32(pos + 4), key: deserializeImportAttributeKey(pos + 8), - value: deserializeStringLiteral(pos + 56), + value: deserializeStringLiteral(pos + 64), }; } function deserializeExportNamedDeclaration(pos) { - const withClause = deserializeOptionBoxWithClause(pos + 104); + const withClause = deserializeOptionBoxWithClause(pos + 112); return { type: 'ExportNamedDeclaration', start: deserializeU32(pos), @@ -984,18 +984,18 @@ function deserializeExportDefaultDeclaration(pos) { type: 'ExportDefaultDeclaration', start: deserializeU32(pos), end: deserializeU32(pos + 4), - declaration: deserializeExportDefaultDeclarationKind(pos + 56), + declaration: deserializeExportDefaultDeclarationKind(pos + 64), }; } function deserializeExportAllDeclaration(pos) { - const withClause = deserializeOptionBoxWithClause(pos + 96); + const withClause = deserializeOptionBoxWithClause(pos + 112); return { type: 'ExportAllDeclaration', start: deserializeU32(pos), end: deserializeU32(pos + 4), exported: deserializeOptionModuleExportName(pos + 8), - source: deserializeStringLiteral(pos + 56), + source: deserializeStringLiteral(pos + 64), attributes: withClause === null ? [] : withClause.withEntries, }; } @@ -1006,7 +1006,7 @@ function deserializeExportSpecifier(pos) { start: deserializeU32(pos), end: deserializeU32(pos + 4), local: deserializeModuleExportName(pos + 8), - exported: deserializeModuleExportName(pos + 56), + exported: deserializeModuleExportName(pos + 64), }; } @@ -1724,9 +1724,9 @@ function deserializeTSModuleDeclaration(pos) { start: deserializeU32(pos), end: deserializeU32(pos + 4), id: deserializeTSModuleDeclarationName(pos + 8), - body: deserializeOptionTSModuleDeclarationBody(pos + 56), - kind: deserializeTSModuleDeclarationKind(pos + 72), - declare: deserializeBool(pos + 73), + body: deserializeOptionTSModuleDeclarationBody(pos + 64), + kind: deserializeTSModuleDeclarationKind(pos + 80), + declare: deserializeBool(pos + 81), }; } @@ -4304,7 +4304,7 @@ function deserializeVecDirective(pos) { pos = uint32[pos32]; for (let i = 0; i < len; i++) { arr.push(deserializeDirective(pos)); - pos += 64; + pos += 72; } return arr; } @@ -5075,7 +5075,7 @@ function deserializeVecImportAttribute(pos) { pos = uint32[pos32]; for (let i = 0; i < len; i++) { arr.push(deserializeImportAttribute(pos)); - pos += 96; + pos += 112; } return arr; } @@ -5092,13 +5092,13 @@ function deserializeVecExportSpecifier(pos) { pos = uint32[pos32]; for (let i = 0; i < len; i++) { arr.push(deserializeExportSpecifier(pos)); - pos += 112; + pos += 128; } return arr; } function deserializeOptionStringLiteral(pos) { - if (uint32[(pos + 8) >> 2] === 0 && uint32[(pos + 12) >> 2] === 0) return null; + if (uint8[pos + 40] === 2) return null; return deserializeStringLiteral(pos); } diff --git a/napi/parser/deserialize-ts.js b/napi/parser/deserialize-ts.js index a967fe8920021..7fd0e885fe398 100644 --- a/napi/parser/deserialize-ts.js +++ b/napi/parser/deserialize-ts.js @@ -458,7 +458,7 @@ function deserializeDirective(pos) { start: deserializeU32(pos), end: deserializeU32(pos + 4), expression: deserializeStringLiteral(pos + 8), - directive: deserializeStr(pos + 48), + directive: deserializeStr(pos + 56), }; } @@ -962,7 +962,7 @@ function deserializeImportExpression(pos) { function deserializeImportDeclaration(pos) { let specifiers = deserializeOptionVecImportDeclarationSpecifier(pos + 8); if (specifiers === null) specifiers = []; - const withClause = deserializeOptionBoxWithClause(pos + 88); + const withClause = deserializeOptionBoxWithClause(pos + 96); return { type: 'ImportDeclaration', start: deserializeU32(pos), @@ -970,7 +970,7 @@ function deserializeImportDeclaration(pos) { specifiers, source: deserializeStringLiteral(pos + 40), attributes: withClause === null ? [] : withClause.withEntries, - importKind: deserializeImportOrExportKind(pos + 96), + importKind: deserializeImportOrExportKind(pos + 104), }; } @@ -980,8 +980,8 @@ function deserializeImportSpecifier(pos) { start: deserializeU32(pos), end: deserializeU32(pos + 4), imported: deserializeModuleExportName(pos + 8), - local: deserializeBindingIdentifier(pos + 56), - importKind: deserializeImportOrExportKind(pos + 88), + local: deserializeBindingIdentifier(pos + 64), + importKind: deserializeImportOrExportKind(pos + 96), }; } @@ -1019,12 +1019,12 @@ function deserializeImportAttribute(pos) { start: deserializeU32(pos), end: deserializeU32(pos + 4), key: deserializeImportAttributeKey(pos + 8), - value: deserializeStringLiteral(pos + 56), + value: deserializeStringLiteral(pos + 64), }; } function deserializeExportNamedDeclaration(pos) { - const withClause = deserializeOptionBoxWithClause(pos + 104); + const withClause = deserializeOptionBoxWithClause(pos + 112); return { type: 'ExportNamedDeclaration', start: deserializeU32(pos), @@ -1033,7 +1033,7 @@ function deserializeExportNamedDeclaration(pos) { specifiers: deserializeVecExportSpecifier(pos + 24), source: deserializeOptionStringLiteral(pos + 56), attributes: withClause === null ? [] : withClause.withEntries, - exportKind: deserializeImportOrExportKind(pos + 96), + exportKind: deserializeImportOrExportKind(pos + 104), }; } @@ -1042,20 +1042,20 @@ function deserializeExportDefaultDeclaration(pos) { type: 'ExportDefaultDeclaration', start: deserializeU32(pos), end: deserializeU32(pos + 4), - declaration: deserializeExportDefaultDeclarationKind(pos + 56), + declaration: deserializeExportDefaultDeclarationKind(pos + 64), }; } function deserializeExportAllDeclaration(pos) { - const withClause = deserializeOptionBoxWithClause(pos + 96); + const withClause = deserializeOptionBoxWithClause(pos + 112); return { type: 'ExportAllDeclaration', start: deserializeU32(pos), end: deserializeU32(pos + 4), exported: deserializeOptionModuleExportName(pos + 8), - source: deserializeStringLiteral(pos + 56), + source: deserializeStringLiteral(pos + 64), attributes: withClause === null ? [] : withClause.withEntries, - exportKind: deserializeImportOrExportKind(pos + 104), + exportKind: deserializeImportOrExportKind(pos + 120), }; } @@ -1065,8 +1065,8 @@ function deserializeExportSpecifier(pos) { start: deserializeU32(pos), end: deserializeU32(pos + 4), local: deserializeModuleExportName(pos + 8), - exported: deserializeModuleExportName(pos + 56), - exportKind: deserializeImportOrExportKind(pos + 104), + exported: deserializeModuleExportName(pos + 64), + exportKind: deserializeImportOrExportKind(pos + 120), }; } @@ -1785,9 +1785,9 @@ function deserializeTSModuleDeclaration(pos) { start: deserializeU32(pos), end: deserializeU32(pos + 4), id: deserializeTSModuleDeclarationName(pos + 8), - body: deserializeOptionTSModuleDeclarationBody(pos + 56), - kind: deserializeTSModuleDeclarationKind(pos + 72), - declare: deserializeBool(pos + 73), + body: deserializeOptionTSModuleDeclarationBody(pos + 64), + kind: deserializeTSModuleDeclarationKind(pos + 80), + declare: deserializeBool(pos + 81), }; } @@ -4365,7 +4365,7 @@ function deserializeVecDirective(pos) { pos = uint32[pos32]; for (let i = 0; i < len; i++) { arr.push(deserializeDirective(pos)); - pos += 64; + pos += 72; } return arr; } @@ -5136,7 +5136,7 @@ function deserializeVecImportAttribute(pos) { pos = uint32[pos32]; for (let i = 0; i < len; i++) { arr.push(deserializeImportAttribute(pos)); - pos += 96; + pos += 112; } return arr; } @@ -5153,13 +5153,13 @@ function deserializeVecExportSpecifier(pos) { pos = uint32[pos32]; for (let i = 0; i < len; i++) { arr.push(deserializeExportSpecifier(pos)); - pos += 112; + pos += 128; } return arr; } function deserializeOptionStringLiteral(pos) { - if (uint32[(pos + 8) >> 2] === 0 && uint32[(pos + 12) >> 2] === 0) return null; + if (uint8[pos + 40] === 2) return null; return deserializeStringLiteral(pos); } diff --git a/tasks/coverage/snapshots/parser_test262.snap b/tasks/coverage/snapshots/parser_test262.snap index 7b905d85bada0..e7b1a8ec20e71 100644 --- a/tasks/coverage/snapshots/parser_test262.snap +++ b/tasks/coverage/snapshots/parser_test262.snap @@ -23905,7 +23905,7 @@ Negative Passed: 4519/4519 (100.00%) 24 β”‚ ╰──── - Γ— Duplicated export '\ud83c' + Γ— Duplicated export 'οΏ½' ╭─[test262/test/language/module-code/early-export-ill-formed-string.js:21:17] 20 β”‚ // πŸŒ™ is '\uD83C\uDF19' 21 β”‚ export {Moon as "\uD83C",} from "./early-export-ill-formed-string.js";