Skip to content

Commit 4615e28

Browse files
committed
Improve performances of clean::{ellipsis, guillemets, dashes}
1 parent 04ecfb5 commit 4615e28

File tree

1 file changed

+14
-7
lines changed

1 file changed

+14
-7
lines changed

src/lib/clean.rs

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ fn char_class(c: char) -> CharClass {
8989
pub fn ellipsis<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
9090
lazy_static! {
9191
static ref REGEX: Regex = Regex::new(r"\.\.\.|\. \. \. ").unwrap();
92+
static ref UNICODE_ELLIPSIS: &'static [u8] = "…".as_bytes();
93+
static ref NB_ELLIPSIS: &'static [u8] = ". . . ".as_bytes();
94+
static ref FULL_NB_ELLIPSIS: &'static [u8] = ". . . ".as_bytes();
9295
}
9396
let input = input.into();
9497
let first = REGEX.find(&input);
@@ -100,13 +103,13 @@ pub fn ellipsis<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
100103
let mut i = 0;
101104
while i < len {
102105
if i + 3 <= len && &rest[i..(i + 3)] == &[b'.', b'.', b'.'] {
103-
output.extend_from_slice("…".as_bytes());
106+
output.extend_from_slice(*UNICODE_ELLIPSIS);
104107
i += 3;
105108
} else if i + 6 <= len && &rest[i..(i + 6)] == &[b'.', b' ', b'.', b' ', b'.', b' '] {
106109
if i + 6 == len || rest[i + 6] != b'.' {
107-
output.extend_from_slice(". . . ".as_bytes());
110+
output.extend_from_slice(*NB_ELLIPSIS);
108111
} else {
109-
output.extend_from_slice(". . . ".as_bytes());
112+
output.extend_from_slice(*FULL_NB_ELLIPSIS);
110113
}
111114
i += 6;
112115
} else {
@@ -267,6 +270,8 @@ pub fn quotes<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
267270
pub fn dashes<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
268271
lazy_static! {
269272
static ref REGEX: Regex = Regex::new(r"\x2D\x2D").unwrap();
273+
static ref EN_SPACE: &'static [u8] = "–".as_bytes();
274+
static ref EM_SPACE: &'static [u8] = "—".as_bytes();
270275
}
271276
let input = input.into();
272277
let first = REGEX.find(&input);
@@ -279,10 +284,10 @@ pub fn dashes<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
279284
while i < len {
280285
if i + 2 <= len && &rest[i..(i + 2)] == &[b'-', b'-'] {
281286
if i + 2 < len && rest[i + 2] == b'-' {
282-
output.extend_from_slice("—".as_bytes());
287+
output.extend_from_slice(*EM_SPACE);
283288
i += 3;
284289
} else {
285-
output.extend_from_slice("–".as_bytes());
290+
output.extend_from_slice(*EN_SPACE);
286291
i += 2;
287292
}
288293
} else {
@@ -312,6 +317,8 @@ pub fn dashes<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
312317
pub fn guillemets<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
313318
lazy_static! {
314319
static ref REGEX: Regex = Regex::new(r"<<|>>").unwrap();
320+
static ref OPENING_GUILLEMET: &'static [u8] = "«".as_bytes();
321+
static ref CLOSING_GUILLEMET: &'static [u8] = "»".as_bytes();
315322
}
316323
let input = input.into();
317324
let first = REGEX.find(&input);
@@ -323,10 +330,10 @@ pub fn guillemets<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
323330
let mut i = 0;
324331
while i < len {
325332
if i + 2 <= len && &rest[i..(i + 2)] == &[b'<', b'<'] {
326-
output.extend_from_slice("«".as_bytes());
333+
output.extend_from_slice(*OPENING_GUILLEMET);
327334
i += 2;
328335
} else if i+2 <= len && &rest[i..(i + 2)] == &[b'>', b'>'] {
329-
output.extend_from_slice("»".as_bytes());
336+
output.extend_from_slice(*CLOSING_GUILLEMET);
330337
i += 2;
331338
} else {
332339
output.push(rest[i]);

0 commit comments

Comments
 (0)