Skip to content

Commit 7fcbdd0

Browse files
committed
slight consolidation/optimization of common isascii(c), c % UInt8 codepath
1 parent 5a69b0f commit 7fcbdd0

File tree

2 files changed

+14
-6
lines changed

2 files changed

+14
-6
lines changed

src/tokenize.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,9 +1282,9 @@ function lex_identifier(l::Lexer, c)
12821282
graphemestate_peek = Ref(zero(Int32))
12831283
while true
12841284
pc, ppc = dpeekchar(l)
1285-
ascii = ascii && isascii(pc)
1285+
pc_byte = Unicode.ascii_byte(pc)
1286+
ascii = ascii && pc_byte != 0xff
12861287
if ascii # fast path
1287-
pc_byte = pc % UInt8
12881288
@inbounds if (pc_byte == UInt8('!') && ppc == '=') || !ascii_is_identifier_char[pc_byte+1]
12891289
break
12901290
end

src/unicode.jl

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,18 @@ function _is_identifier_start_char(c::UInt32, cat::Integer)
153153
(c >= 0x1D7CE && c <= 0x1D7E1)) # 𝟎 through 𝟗 (inclusive), 𝟘 through 𝟡 (inclusive)
154154
end
155155

156+
# utility function to return the ASCII byte if isascii(c),
157+
# and otherwise (for ASCII or invalid chars) return 0xff,
158+
# based on the isascii source code.
159+
@inline function ascii_byte(c::Char)
160+
x = bswap(reinterpret(UInt32, c))
161+
return x < 0x80 ? x % UInt8 : 0xff
162+
end
163+
156164
# from jl_id_start_char in julia/src/flisp/julia_extensions.c
157165
function is_identifier_start_char(c::Char)
158-
if isascii(c)
159-
a = c % UInt8
166+
a = ascii_byte(c)
167+
if a != 0xff
160168
return (a >= UInt8('A') && a <= UInt8('Z')) || (a >= UInt8('a') && a <= UInt8('z')) || a == UInt8('_')
161169
end
162170
if c < Char(0xA1) || !isvalid(c)
@@ -168,8 +176,8 @@ end
168176

169177
# from jl_id_char in julia/src/flisp/julia_extensions.c
170178
function is_identifier_char(c::Char)
171-
if isascii(c)
172-
a = c % UInt8
179+
a = ascii_byte(c)
180+
if a != 0xff
173181
return (a >= UInt8('A') && a <= UInt8('Z')) || (a >= UInt8('a') && a <= UInt8('z')) ||
174182
a == UInt8('_') || (a >= UInt8('0') && a <= UInt8('9')) || a == UInt8('!')
175183
end

0 commit comments

Comments
 (0)