Skip to content

Commit dc1afcb

Browse files
committed
refactor(parser): use StringBuilder instead of String
1 parent 07d28d7 commit dc1afcb

File tree

4 files changed

+25
-21
lines changed

4 files changed

+25
-21
lines changed

crates/oxc_parser/src/lexer/identifier.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::cmp::max;
22

3-
use oxc_allocator::String;
3+
use oxc_allocator::StringBuilder;
44
use oxc_span::Span;
55
use oxc_syntax::identifier::{
66
is_identifier_part, is_identifier_part_unicode, is_identifier_start_unicode,
@@ -136,7 +136,7 @@ impl<'a> Lexer<'a> {
136136
pub fn identifier_backslash_handler(&mut self) -> Kind {
137137
// Create arena string to hold unescaped identifier.
138138
// We don't know how long identifier will end up being, so guess.
139-
let str = String::with_capacity_in(MIN_ESCAPED_STR_LEN, self.allocator);
139+
let str = StringBuilder::with_capacity_in(MIN_ESCAPED_STR_LEN, self.allocator);
140140

141141
// Process escape and get rest of identifier
142142
let id = self.identifier_on_backslash(str, true);
@@ -153,7 +153,7 @@ impl<'a> Lexer<'a> {
153153
// will be double what we've seen so far, or `MIN_ESCAPED_STR_LEN` minimum.
154154
let so_far = self.source.str_from_pos_to_current(start_pos);
155155
let capacity = max(so_far.len() * 2, MIN_ESCAPED_STR_LEN);
156-
let mut str = String::with_capacity_in(capacity, self.allocator);
156+
let mut str = StringBuilder::with_capacity_in(capacity, self.allocator);
157157

158158
// Push identifier up this point into `str`
159159
str.push_str(so_far);
@@ -167,7 +167,11 @@ impl<'a> Lexer<'a> {
167167
/// `self.source` should be positioned *on* the `\` (i.e. `\` has not been consumed yet).
168168
/// `str` should contain the identifier up to before the escape.
169169
/// `is_start` should be `true` if this is first char in the identifier, `false` otherwise.
170-
fn identifier_on_backslash(&mut self, mut str: String<'a>, mut is_start: bool) -> &'a str {
170+
fn identifier_on_backslash(
171+
&mut self,
172+
mut str: StringBuilder<'a>,
173+
mut is_start: bool,
174+
) -> &'a str {
171175
'outer: loop {
172176
// Consume `\`
173177
self.consume_char();
@@ -201,7 +205,7 @@ impl<'a> Lexer<'a> {
201205
}
202206

203207
// Convert `str` to arena slice and save to `escaped_strings`
204-
let id = str.into_bump_str();
208+
let id = str.into_str();
205209
self.save_string(true, id);
206210
id
207211
}

crates/oxc_parser/src/lexer/string.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::cmp::max;
22

3-
use oxc_allocator::String;
3+
use oxc_allocator::StringBuilder;
44

55
use crate::diagnostics;
66

@@ -107,7 +107,7 @@ macro_rules! handle_string_literal_escape {
107107
// will be double what we've seen so far, or `MIN_ESCAPED_STR_LEN` minimum.
108108
let so_far = $lexer.source.str_from_pos_to_current($after_opening_quote);
109109
let capacity = max(so_far.len() * 2, MIN_ESCAPED_STR_LEN);
110-
let mut str = String::with_capacity_in(capacity, $lexer.allocator);
110+
let mut str = StringBuilder::with_capacity_in(capacity, $lexer.allocator);
111111

112112
// Push chunk before `\` into `str`.
113113
str.push_str(so_far);
@@ -193,7 +193,7 @@ macro_rules! handle_string_literal_escape {
193193
}
194194

195195
// Convert `str` to arena slice and save to `escaped_strings`
196-
$lexer.save_string(true, str.into_bump_str());
196+
$lexer.save_string(true, str.into_str());
197197

198198
Kind::Str
199199
}};

crates/oxc_parser/src/lexer/template.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::{cmp::max, str};
22

3-
use oxc_allocator::String;
3+
use oxc_allocator::StringBuilder;
44

55
use crate::diagnostics;
66

@@ -194,14 +194,14 @@ impl<'a> Lexer<'a> {
194194
///
195195
/// # SAFETY
196196
/// `pos` must not be before `self.source.position()`
197-
unsafe fn template_literal_create_string(&self, pos: SourcePosition<'a>) -> String<'a> {
197+
unsafe fn template_literal_create_string(&self, pos: SourcePosition<'a>) -> StringBuilder<'a> {
198198
// Create arena string to hold modified template literal.
199199
// We don't know how long template literal will end up being. Take a guess that total length
200200
// will be double what we've seen so far, or `MIN_ESCAPED_TEMPLATE_LIT_LEN` minimum.
201201
// SAFETY: Caller guarantees `pos` is not before `self.source.position()`.
202202
let so_far = unsafe { self.source.str_from_current_to_pos_unchecked(pos) };
203203
let capacity = max(so_far.len() * 2, MIN_ESCAPED_TEMPLATE_LIT_LEN);
204-
let mut str = String::with_capacity_in(capacity, self.allocator);
204+
let mut str = StringBuilder::with_capacity_in(capacity, self.allocator);
205205
str.push_str(so_far);
206206
str
207207
}
@@ -212,7 +212,7 @@ impl<'a> Lexer<'a> {
212212
/// `chunk_start` must not be after `pos`.
213213
unsafe fn template_literal_escaped(
214214
&mut self,
215-
mut str: String<'a>,
215+
mut str: StringBuilder<'a>,
216216
pos: SourcePosition<'a>,
217217
mut chunk_start: SourcePosition<'a>,
218218
mut is_valid_escape_sequence: bool,
@@ -379,7 +379,7 @@ impl<'a> Lexer<'a> {
379379
},
380380
};
381381

382-
self.save_template_string(is_valid_escape_sequence, str.into_bump_str());
382+
self.save_template_string(is_valid_escape_sequence, str.into_str());
383383

384384
ret
385385
}

crates/oxc_parser/src/lexer/unicode.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::{borrow::Cow, fmt::Write};
22

33
use cow_utils::CowUtils;
44

5-
use oxc_allocator::String;
5+
use oxc_allocator::StringBuilder;
66
use oxc_syntax::identifier::{
77
CR, FF, LF, LS, PS, TAB, VT, is_identifier_part, is_identifier_start,
88
is_identifier_start_unicode, is_irregular_line_terminator, is_irregular_whitespace,
@@ -61,7 +61,7 @@ impl<'a> Lexer<'a> {
6161
/// \u{ `CodePoint` }
6262
pub(super) fn identifier_unicode_escape_sequence(
6363
&mut self,
64-
str: &mut String<'a>,
64+
str: &mut StringBuilder<'a>,
6565
check_identifier_start: bool,
6666
) {
6767
let start = self.offset();
@@ -115,7 +115,7 @@ impl<'a> Lexer<'a> {
115115
/// \u{ `CodePoint` }
116116
fn string_unicode_escape_sequence(
117117
&mut self,
118-
text: &mut String<'a>,
118+
text: &mut StringBuilder<'a>,
119119
is_valid_escape_sequence: &mut bool,
120120
) {
121121
let value = match self.peek_byte() {
@@ -153,7 +153,7 @@ impl<'a> Lexer<'a> {
153153
}
154154

155155
/// Lone surrogate found in string.
156-
fn string_lone_surrogate(&mut self, code_point: u32, text: &mut String<'a>) {
156+
fn string_lone_surrogate(&mut self, code_point: u32, text: &mut StringBuilder<'a>) {
157157
debug_assert!(code_point <= 0xFFFF);
158158

159159
if !self.token.lone_surrogates() {
@@ -167,7 +167,7 @@ impl<'a> Lexer<'a> {
167167
// But strings containing both lone surrogates and lossy replacement characters
168168
// should be vanishingly rare, so don't bother.
169169
if let Cow::Owned(replaced) = text.cow_replace("\u{FFFD}", "\u{FFFD}fffd") {
170-
*text = String::from_str_in(&replaced, self.allocator);
170+
*text = StringBuilder::from_str_in(&replaced, self.allocator);
171171
}
172172
}
173173

@@ -307,7 +307,7 @@ impl<'a> Lexer<'a> {
307307
// EscapeSequence ::
308308
pub(super) fn read_string_escape_sequence(
309309
&mut self,
310-
text: &mut String<'a>,
310+
text: &mut StringBuilder<'a>,
311311
in_template: bool,
312312
is_valid_escape_sequence: &mut bool,
313313
) {
@@ -380,14 +380,14 @@ impl<'a> Lexer<'a> {
380380
// 192-255: `0xC3`, followed by code point value - 64.
381381
let bytes = [0xC0 + first_digit, value & 0b1011_1111];
382382
// SAFETY: `bytes` is a valid 2-byte UTF-8 sequence
383-
unsafe { text.as_mut_vec().extend_from_slice(&bytes) };
383+
unsafe { text.push_bytes_unchecked(&bytes) };
384384
return;
385385
}
386386
}
387387
}
388388

389389
// SAFETY: `value` is in range 0 to `((1 * 8) + 7) * 8 + 7` (127) i.e. ASCII
390-
unsafe { text.as_mut_vec().push(value) };
390+
unsafe { text.push_byte_unchecked(value) };
391391
}
392392
'0' if in_template && self.peek_byte().is_some_and(|b| b.is_ascii_digit()) => {
393393
self.consume_char();

0 commit comments

Comments
 (0)