Skip to content

Commit 25679e6

Browse files
committed
perf(parser): optimize Lexer::hex_digit (#4572)
Optimize `Lexer::hex_digit`. Rather than checking for `A-F` and `a-f` separately, can check for them both in one go. `b'A' | 32 == b'a'` (and same for all other alphabetic letters) so matching against `b | 32` allows checking for all matching letters, lower or upper case, in one operation.
1 parent 247b2af commit 25679e6

File tree

1 file changed

+23
-7
lines changed

1 file changed

+23
-7
lines changed

crates/oxc_parser/src/lexer/unicode.rs

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -160,14 +160,30 @@ impl<'a> Lexer<'a> {
160160
}
161161

162162
fn hex_digit(&mut self) -> Option<u32> {
163-
let value = match self.peek_byte() {
164-
Some(b @ b'0'..=b'9') => u32::from(b) - '0' as u32,
165-
Some(b @ b'a'..=b'f') => 10 + (u32::from(b) - 'a' as u32),
166-
Some(b @ b'A'..=b'F') => 10 + (u32::from(b) - 'A' as u32),
167-
_ => return None,
163+
// Reduce instructions and remove 1 branch by comparing against `A-F` and `a-f` simultaneously
164+
// https://godbolt.org/z/9caMMzvP3
165+
let value = if let Some(b) = self.peek_byte() {
166+
if b.is_ascii_digit() {
167+
b - b'0'
168+
} else {
169+
// Match `A-F` or `a-f`. `b | 32` converts uppercase letters to lowercase,
170+
// but leaves lowercase as they are
171+
let lower_case = b | 32;
172+
if matches!(lower_case, b'a'..=b'f') {
173+
lower_case + 10 - b'a'
174+
} else {
175+
return None;
176+
}
177+
}
178+
} else {
179+
return None;
168180
};
169-
self.consume_char();
170-
Some(value)
181+
// Because of `b | 32` above, compiler cannot deduce that next byte is definitely ASCII
182+
// so `next_byte_unchecked` is necessary to produce compact assembly, rather than `consume_char`.
183+
// SAFETY: This code is only reachable if there is a byte remaining, and it's ASCII.
184+
// Therefore it's safe to consume that byte, and will leave position on a UTF-8 char boundary.
185+
unsafe { self.source.next_byte_unchecked() };
186+
Some(u32::from(value))
171187
}
172188

173189
fn code_point(&mut self) -> Option<u32> {

0 commit comments

Comments
 (0)