only split by newlines

To reduce overhead of the Extractor itself, we can chunk the work by lines instead of every whitespace-separated chunk. This seems to improve the overall cost even more! Co-authored-by: Jordan Pittman <[email protected]>
tailwindlabs · adamwathan · Dec 2, 2024 · Dec 2, 2024 · Dec 2, 2024 · Dec 2, 2024
commit 8fe397717d11e85737220049ebc8fcdc02744628
diff --git a/crates/oxide/src/lib.rs b/crates/oxide/src/lib.rs
@@ -456,7 +456,7 @@ fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
 fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> Vec<String> {
     let mut result: Vec<_> = blobs
         .par_iter()
-        .flat_map(|blob| blob.par_split(|x| x.is_ascii_whitespace()))
+        .flat_map(|blob| blob.par_split(|x| matches!(x, b'\n' | b'\r')))
         .map(|blob| Extractor::unique(blob, Default::default()))
         .reduce(Default::default, |mut a, b| {
             a.extend(b);