From 788d61baa29b6e2d6bcb94a6576dad820a6388ba Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Fri, 25 Oct 2024 21:45:23 +0200 Subject: [PATCH 01/17] Update crates/oxide/src/lib.rs Co-authored-by: Jordan Pittman From 9e9c5a259506b3cb9014921b0dfd54f9784cbc08 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 15:34:25 +0100 Subject: [PATCH 02/17] add failing test This proves that right now when using a glob, that files that should be git ignored are _not_ git ignored. --- integrations/cli/index.test.ts | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/integrations/cli/index.test.ts b/integrations/cli/index.test.ts index b36ca4c07688..cd0c20a5dfaa 100644 --- a/integrations/cli/index.test.ts +++ b/integrations/cli/index.test.ts @@ -256,7 +256,7 @@ describe.each([ ) }) -test( +test.only( 'source(…) and `@source` can be configured to use auto source detection (build + watch mode)', { fs: { @@ -280,9 +280,7 @@ test( /* Run auto-content detection in ../../project-b */ @import 'tailwindcss/utilities' source('../../project-b'); - /* Additive: */ - /* {my-lib-1,my-lib-2}: expand */ - /* *.html: only look for .html */ + /* Explicitly using node_modules in the @source allows git ignored folders */ @source '../node_modules/{my-lib-1,my-lib-2}/src/**/*.html'; /* We typically ignore these extensions, but now include them explicitly */ @@ -290,6 +288,9 @@ test( /* Project C should apply auto source detection */ @source '../../project-c'; + + /* Project D should apply auto source detection rules, such as ignoring node_modules */ + @source '../../project-d/**/*.{html,js}'; `, // Project A is the current folder, but we explicitly configured @@ -362,6 +363,21 @@ test( class="content-['SHOULD-NOT-EXIST-IN-OUTPUT'] content-['project-c/node_modules/my-lib-1/src/index.html']" > `, + + // Project D should apply auto source detection rules, such as ignoring + // node_modules. + 'project-d/node_modules/my-lib-1/src/index.html': html` +
+ `, + + // Project D should look for files with the extensions html and js. + 'project-d/src/index.html': html` +
+ `, }, }, async ({ fs, exec, spawn, root }) => { @@ -392,6 +408,10 @@ test( --tw-content: 'project-c/src/index.html'; content: var(--tw-content); } + .content-\\[\\'project-d\\/src\\/index\\.html\\'\\] { + --tw-content: 'project-d/src/index.html'; + content: var(--tw-content); + } @supports (-moz-orient: inline) { @layer base { *, ::before, ::after, ::backdrop { From 22af714cc1fd3be5d788e6b661160c637b527e0f Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 18:31:50 +0100 Subject: [PATCH 03/17] add globset --- Cargo.lock | 1 + crates/oxide/Cargo.toml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index eb9602407af0..e339ec660621 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -525,6 +525,7 @@ dependencies = [ "crossbeam", "dunce", "glob-match", + "globset", "globwalk", "ignore", "log", diff --git a/crates/oxide/Cargo.toml b/crates/oxide/Cargo.toml index 763b0c291f69..e873fcc00452 100644 --- a/crates/oxide/Cargo.toml +++ b/crates/oxide/Cargo.toml @@ -14,9 +14,10 @@ tracing = { version = "0.1.40", features = [] } tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } walkdir = "2.5.0" ignore = "0.4.23" -glob-match = "0.2.1" dunce = "1.0.5" bexpand = "1.2.0" +globset = "0.4.15" +glob-match = "0.2.1" [dev-dependencies] tempfile = "3.13.0" From eb7e5e69889093fa8b842b34d31d3e6365d23f5b Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 18:32:30 +0100 Subject: [PATCH 04/17] split static and dynamic part of glob, then resolve --- crates/oxide/src/glob.rs | 203 ++++++++++++---------- crates/oxide/src/lib.rs | 76 ++++---- crates/oxide/src/scanner/allowed_paths.rs | 11 +- 3 files changed, 160 insertions(+), 130 deletions(-) diff --git a/crates/oxide/src/glob.rs b/crates/oxide/src/glob.rs index d74b60b4d824..44e80d411899 100644 --- a/crates/oxide/src/glob.rs +++ b/crates/oxide/src/glob.rs @@ -1,24 +1,64 @@ +use fxhash::{FxHashMap, FxHashSet}; use glob_match::glob_match; use std::iter; use std::path::{Path, PathBuf}; +use tracing::event; use crate::GlobEntry; pub fn fast_glob( patterns: &Vec, ) -> Result, std::io::Error> { - Ok(get_fast_patterns(patterns) + Ok(optimize_patterns(patterns) .into_iter() - .flat_map(|(base_path, patterns)| { - globwalk::GlobWalkerBuilder::from_patterns(base_path, &patterns) - .follow_links(true) - .build() - .unwrap() - .filter_map(Result::ok) - .map(|file| file.path().to_path_buf()) + .flat_map(|glob_entry| { + globwalk::GlobWalkerBuilder::from_patterns( + glob_entry.base, + &[glob_entry.pattern.as_str()][..], + ) + .follow_links(true) + .build() + .unwrap() + .filter_map(Result::ok) + .map(|file| file.path().to_path_buf()) })) } +pub fn hoist_static_glob_parts(entries: &Vec) -> Vec { + let mut result = vec![]; + + for entry in entries { + let (static_part, dynamic_part) = split_pattern(&entry.pattern); + + let base: PathBuf = entry.base.clone().into(); + let base = match static_part { + Some(static_part) => base.join(static_part), + None => base, + }; + + let base = match dunce::canonicalize(&base) { + Ok(base) => base, + Err(err) => { + event!(tracing::Level::ERROR, "Failed to resolve glob: {:?}", err); + // If we can't resolve the new base on disk, let's just skip this entry. + continue; + } + }; + + let pattern = match dynamic_part { + Some(dynamic_part) => dynamic_part, + None => "**/*".to_owned(), + }; + + result.push(GlobEntry { + base: base.to_string_lossy().to_string(), + pattern, + }); + } + + result +} + /// This function attempts to optimize the glob patterns to improve performance. The problem is /// that if you run the following command: /// ```sh @@ -42,98 +82,83 @@ pub fn fast_glob( /// tailwind --pwd ./project/pages --content "**/*.js" /// tailwind --pwd ./project/components --content "**/*.js" /// ``` -pub fn get_fast_patterns(patterns: &Vec) -> Vec<(PathBuf, Vec)> { - let mut optimized_patterns: Vec<(PathBuf, Vec)> = vec![]; +pub fn optimize_patterns(entries: &Vec) -> Vec { + let entries = hoist_static_glob_parts(entries); - for pattern in patterns { - let base_path = PathBuf::from(&pattern.base); - let pattern = &pattern.pattern; + // Track all base paths and their patterns. Later we will turn them back into `GlobalEntry`s. + let mut pattern_map: FxHashMap> = FxHashMap::default(); - let is_negated = pattern.starts_with('!'); - let mut pattern = pattern.clone(); - if is_negated { - pattern.remove(0); - } + for glob_entry in entries { + let entry = pattern_map.entry(glob_entry.base).or_default(); + entry.insert(glob_entry.pattern.clone()); + } - let mut folders = pattern.split('/').collect::>(); - - if folders.len() <= 1 { - // No paths we can simplify, so let's use it as-is. - optimized_patterns.push((base_path, vec![pattern])); - } else { - // We do have folders because `/` exists. Let's try to simplify the globs! - // Safety: We know that the length is greater than 1, so we can safely unwrap. - let file_pattern = folders.pop().unwrap(); - let all_folders = folders.clone(); - let mut temp_paths = vec![base_path]; - - let mut bail = false; - - for (i, folder) in folders.into_iter().enumerate() { - // There is a wildcard in the folder, so we have to bail now... 😢 But this also - // means that we can skip looking at the rest of the folders, so there is at least - // this small optimization we can apply! - if folder.contains('*') { - // Get all the remaining folders, attach the existing file_pattern so that this - // can now be the final pattern we use. - let mut remaining_folders = all_folders[i..].to_vec(); - remaining_folders.push(file_pattern); - - let pattern = remaining_folders.join("/"); - for path in &temp_paths { - optimized_patterns.push((path.to_path_buf(), vec![pattern.to_string()])); - } - - bail = true; - break; - } + // TODO: Optimization, if any of the patterns result in `**/*`, then we can do two things: + // 1. All base paths in the pattern_map, that start with the current base path, can be removed. + // 2. All patterns that are not `**/*` can be removed from the current base path. - // The folder is very likely using an expandable pattern which we can expand! - if folder.contains('{') && folder.contains('}') { - let branches = expand_braces(folder); - - let existing_paths = temp_paths; - temp_paths = branches - .iter() - .flat_map(|branch| { - existing_paths - .clone() - .into_iter() - .map(|path| path.join(branch)) - .collect::>() - }) - .collect::>(); - } - // The folder should just be a simple folder name without any glob magic. We should - // be able to safely add it to the existing paths. - else { - temp_paths = temp_paths - .into_iter() - .map(|path| path.join(folder)) - .collect(); - } + pattern_map + .into_iter() + .map(|(base, patterns)| { + let size = patterns.len(); + let mut patterns = patterns.into_iter().collect::>(); + patterns.sort(); + let combined_patterns = patterns.join(","); + + // TODO: Right now this will generate something like `{**/*.html,**/*.js}`, but maybe + // we want to generate this instead:`**/*.{html,js}`. + + GlobEntry { + base, + pattern: match size { + 1 => combined_patterns, + _ => format!("{{{}}}", combined_patterns), + }, } + }) + .collect::>() +} - // As long as we didn't bail, we can now add the current expanded patterns to the - // optimized patterns. - if !bail { - for path in &temp_paths { - optimized_patterns.push((path.to_path_buf(), vec![file_pattern.to_string()])); - } - } +// Split a glob pattern into a `static` and `dynamic` part. +// +// Assumption: we assume that all globs are expanded, which means that the only dynamic parts are +// using `*`. +// +// E.g.: +// Original input: `../project-b/**/*.{html,js}` +// Expanded input: `../project-b/**/*.html` & `../project-b/**/*.js` +// Split on first input: ("../project-b", "**/*.html") +// Split on second input: ("../project-b", "**/*.js") +fn split_pattern(input: &str) -> (Option, Option) { + // No dynamic parts, so we can just return the input as-is. + if !input.contains('*') { + return (Some(input.to_owned()), None); + } + + let mut last_slash_position = None; + + for (i, c) in input.char_indices() { + if c == '/' { + last_slash_position = Some(i); } - // Ensure that we re-add all the `!` signs to the patterns. - if is_negated { - for (_, patterns) in &mut optimized_patterns { - for pattern in patterns { - pattern.insert(0, '!'); - } - } + if c == '*' { + break; } } - optimized_patterns + // Very first character is a `*`, therefore there is no static part, only a dynamic part. + let Some(last_slash_position) = last_slash_position else { + return (None, Some(input.to_owned())); + }; + + let static_part = input[..last_slash_position].to_owned(); + let dynamic_part = input[last_slash_position + 1..].to_owned(); + + let static_part = (!static_part.is_empty()).then_some(static_part); + let dynamic_part = (!dynamic_part.is_empty()).then_some(dynamic_part); + + (static_part, dynamic_part) } pub fn path_matches_globs(path: &Path, globs: &[GlobEntry]) -> bool { diff --git a/crates/oxide/src/lib.rs b/crates/oxide/src/lib.rs index 24eecca8bc0c..4e4e7749f731 100644 --- a/crates/oxide/src/lib.rs +++ b/crates/oxide/src/lib.rs @@ -1,10 +1,12 @@ +use crate::glob::hoist_static_glob_parts; use crate::parser::Extractor; +use crate::scanner::allowed_paths::resolve_paths; use crate::scanner::detect_sources::DetectSources; use bexpand::Expression; use bstr::ByteSlice; use fxhash::{FxHashMap, FxHashSet}; -use glob::fast_glob; -use glob::get_fast_patterns; +use glob::optimize_patterns; +use globset::Glob; use rayon::prelude::*; use std::fs; use std::path::PathBuf; @@ -255,9 +257,6 @@ impl Scanner { false }); - // Turn `Vec<&GlobEntry>` in `Vec` - let glob_sources: Vec<_> = glob_sources.into_iter().cloned().collect(); - for path in auto_sources .iter() .map(|source| PathBuf::from(&source.base).join(source.pattern.trim_end_matches("**/*"))) @@ -269,46 +268,43 @@ impl Scanner { self.globs.extend(globs); } - let resolved_files: Vec<_> = match fast_glob(&glob_sources) { - Ok(matches) => matches - .filter_map(|x| dunce::canonicalize(&x).ok()) - .collect(), - Err(err) => { - event!(tracing::Level::ERROR, "Failed to resolve glob: {:?}", err); - vec![] - } - }; + // Turn `Vec<&GlobEntry>` in `Vec` + let glob_sources: Vec<_> = glob_sources.into_iter().cloned().collect(); + let hoisted = hoist_static_glob_parts(&glob_sources); - self.files.extend(resolved_files); - self.globs.extend(glob_sources); + for source in &hoisted { + let Ok(glob) = Glob::new(&source.base) else { + continue; + }; - // Re-optimize the globs to reduce the number of patterns we have to scan. - self.globs = get_fast_patterns(&self.globs) - .into_iter() - .filter_map(|(root, globs)| { - let root = match dunce::canonicalize(root) { - Ok(root) => root, - Err(error) => { - event!( - tracing::Level::ERROR, - "Failed to canonicalize base path {:?}", - error - ); - return None; - } + let glob = glob.compile_matcher(); + + let base = PathBuf::from(&source.base); + for entry in resolve_paths(&base) { + let Some(file_type) = entry.file_type() else { + continue; }; - Some((root, globs)) - }) - .flat_map(|(root, globs)| { - let base = root.display().to_string(); + if !file_type.is_file() { + continue; + } - globs.into_iter().map(move |glob| GlobEntry { - base: base.clone(), - pattern: glob, - }) - }) - .collect::>(); + let file_path = entry.into_path(); + + let Some(file_path_str) = file_path.to_str() else { + continue; + }; + + if glob.is_match(file_path_str) { + self.files.push(file_path); + } + } + } + + self.globs.extend(hoisted); + + // Re-optimize the globs to reduce the number of patterns we have to scan. + self.globs = optimize_patterns(&self.globs); } } diff --git a/crates/oxide/src/scanner/allowed_paths.rs b/crates/oxide/src/scanner/allowed_paths.rs index 3015e9dd0465..a761cd34b03c 100644 --- a/crates/oxide/src/scanner/allowed_paths.rs +++ b/crates/oxide/src/scanner/allowed_paths.rs @@ -30,7 +30,7 @@ pub fn resolve_allowed_paths(root: &Path) -> impl Iterator { WalkBuilder::new(root) .hidden(false) .require_git(false) - .filter_entry(|entry| match entry.file_type() { + .filter_entry(move |entry| match entry.file_type() { Some(file_type) if file_type.is_dir() => match entry.file_name().to_str() { Some(dir) => !IGNORED_CONTENT_DIRS.contains(&dir), None => false, @@ -44,6 +44,15 @@ pub fn resolve_allowed_paths(root: &Path) -> impl Iterator { .filter_map(Result::ok) } +#[tracing::instrument(skip(root))] +pub fn resolve_paths(root: &Path) -> impl Iterator { + WalkBuilder::new(root) + .hidden(false) + .require_git(false) + .build() + .filter_map(Result::ok) +} + pub fn is_allowed_content_path(path: &Path) -> bool { // Skip known ignored files if path From 0e3c1a721ca2a5275f2aed3f27d43db54117adb2 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 18:33:37 +0100 Subject: [PATCH 05/17] =?UTF-8?q?always=20log=20`exec(=E2=80=A6)`=20result?= =?UTF-8?q?=20when=20using=20`test.only`=20or=20`test.debug`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- integrations/utils.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/integrations/utils.ts b/integrations/utils.ts index 9e11c5a7358a..9389d638b381 100644 --- a/integrations/utils.ts +++ b/integrations/utils.ts @@ -112,8 +112,14 @@ export function test( (error, stdout, stderr) => { if (error) { if (execOptions.ignoreStdErr !== true) console.error(stderr) + if (only || debug) { + console.error(stdout) + } reject(error) } else { + if (only || debug) { + console.log(stdout.toString() + '\n\n' + stderr.toString()) + } resolve(stdout.toString() + '\n\n' + stderr.toString()) } }, @@ -187,14 +193,14 @@ export function test( child.stdout.on('data', (result) => { let content = result.toString() - if (debug) console.log(content) + if (debug || only) console.log(content) combined.push(['stdout', content]) stdoutMessages.push(content) notifyNext(stdoutActors, stdoutMessages) }) child.stderr.on('data', (result) => { let content = result.toString() - if (debug) console.error(content) + if (debug || only) console.error(content) combined.push(['stderr', content]) stderrMessages.push(content) notifyNext(stderrActors, stderrMessages) From 7b25fcd04cb2a896d09482aa6a0a8ddbde4fbe0b Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 18:34:34 +0100 Subject: [PATCH 06/17] =?UTF-8?q?extend=20`@source=20"=E2=80=A6";`=20and?= =?UTF-8?q?=20`source(=E2=80=A6)`=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This test now makes sure that: 1. If a glob is used, and it contains `**`, then it should _not_ resolve ignored files/folders like `node_modules`. 2. If an explicit glob is used that contains `node_modules`, then we will resolve files in this folder. 3. If an explicit glob is used, and that glob includes file extensions we normally ignore then those files should resolve because the glob is explicit. --- integrations/cli/index.test.ts | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/integrations/cli/index.test.ts b/integrations/cli/index.test.ts index cd0c20a5dfaa..b1b17eb1af40 100644 --- a/integrations/cli/index.test.ts +++ b/integrations/cli/index.test.ts @@ -256,7 +256,7 @@ describe.each([ ) }) -test.only( +test( 'source(…) and `@source` can be configured to use auto source detection (build + watch mode)', { fs: { @@ -291,6 +291,10 @@ test.only( /* Project D should apply auto source detection rules, such as ignoring node_modules */ @source '../../project-d/**/*.{html,js}'; + @source '../../project-d/**/*.bin'; + + /* Same as above, but my-lib-2 _should_ be includes */ + @source '../../project-d/node_modules/my-lib-2/*.{html,js}'; `, // Project A is the current folder, but we explicitly configured @@ -372,12 +376,27 @@ test.only( > `, + // Project D has an explicit glob containing node_modules, thus should include the html file + 'project-d/node_modules/my-lib-2/src/index.html': html` +
+ `, + // Project D should look for files with the extensions html and js. 'project-d/src/index.html': html`
`, + + // Project D should have a binary file even though we ignore binary files + // by default, but it's explicitly listed. + 'project-d/my-binary-file.bin': html` +
+ `, }, }, async ({ fs, exec, spawn, root }) => { @@ -408,6 +427,14 @@ test.only( --tw-content: 'project-c/src/index.html'; content: var(--tw-content); } + .content-\\[\\'project-d\\/my-binary-file\\.bin\\'\\] { + --tw-content: 'project-d/my-binary-file.bin'; + content: var(--tw-content); + } + .content-\\[\\'project-d\\/node_modules\\/my-lib-2\\/src\\/index\\.html\\'\\] { + --tw-content: 'project-d/node modules/my-lib-2/src/index.html'; + content: var(--tw-content); + } .content-\\[\\'project-d\\/src\\/index\\.html\\'\\] { --tw-content: 'project-d/src/index.html'; content: var(--tw-content); From 0e132dbe94a46a56a6d5c1f4d16abf50f811223c Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 19:45:17 +0100 Subject: [PATCH 07/17] ensure glob entries are sorted This gives us stable results, which is useful during debugging and in tests --- crates/oxide/src/glob.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/oxide/src/glob.rs b/crates/oxide/src/glob.rs index 44e80d411899..94610097caa0 100644 --- a/crates/oxide/src/glob.rs +++ b/crates/oxide/src/glob.rs @@ -97,7 +97,7 @@ pub fn optimize_patterns(entries: &Vec) -> Vec { // 1. All base paths in the pattern_map, that start with the current base path, can be removed. // 2. All patterns that are not `**/*` can be removed from the current base path. - pattern_map + let mut glob_entries = pattern_map .into_iter() .map(|(base, patterns)| { let size = patterns.len(); @@ -116,7 +116,12 @@ pub fn optimize_patterns(entries: &Vec) -> Vec { }, } }) - .collect::>() + .collect::>(); + + // Sort the entries by base path to ensure we have stable results. + glob_entries.sort_by(|a, z| a.base.cmp(&z.base)); + + glob_entries } // Split a glob pattern into a `static` and `dynamic` part. From d58a555f4d98f49efa100eb28d9814c1274d34f6 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 19:47:21 +0100 Subject: [PATCH 08/17] rename `input` to `pattern` And make sure that we break once we find a `!`. --- crates/oxide/src/glob.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/oxide/src/glob.rs b/crates/oxide/src/glob.rs index 94610097caa0..2f60fbf7ef45 100644 --- a/crates/oxide/src/glob.rs +++ b/crates/oxide/src/glob.rs @@ -134,31 +134,31 @@ pub fn optimize_patterns(entries: &Vec) -> Vec { // Expanded input: `../project-b/**/*.html` & `../project-b/**/*.js` // Split on first input: ("../project-b", "**/*.html") // Split on second input: ("../project-b", "**/*.js") -fn split_pattern(input: &str) -> (Option, Option) { +fn split_pattern(pattern: &str) -> (Option, Option) { // No dynamic parts, so we can just return the input as-is. - if !input.contains('*') { - return (Some(input.to_owned()), None); + if !pattern.contains('*') { + return (Some(pattern.to_owned()), None); } let mut last_slash_position = None; - for (i, c) in input.char_indices() { + for (i, c) in pattern.char_indices() { if c == '/' { last_slash_position = Some(i); } - if c == '*' { + if c == '*' || c == '!' { break; } } // Very first character is a `*`, therefore there is no static part, only a dynamic part. let Some(last_slash_position) = last_slash_position else { - return (None, Some(input.to_owned())); + return (None, Some(pattern.to_owned())); }; - let static_part = input[..last_slash_position].to_owned(); - let dynamic_part = input[last_slash_position + 1..].to_owned(); + let static_part = pattern[..last_slash_position].to_owned(); + let dynamic_part = pattern[last_slash_position + 1..].to_owned(); let static_part = (!static_part.is_empty()).then_some(static_part); let dynamic_part = (!dynamic_part.is_empty()).then_some(dynamic_part); From 1b7a31c0994284a8ae2ba06b131fae63c85ccd17 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 19:48:42 +0100 Subject: [PATCH 09/17] drop unused code, and update tests with new structure --- crates/oxide/src/glob.rs | 579 ++++++++++++++++++++++----------------- 1 file changed, 328 insertions(+), 251 deletions(-) diff --git a/crates/oxide/src/glob.rs b/crates/oxide/src/glob.rs index 2f60fbf7ef45..57199b43fe2f 100644 --- a/crates/oxide/src/glob.rs +++ b/crates/oxide/src/glob.rs @@ -174,167 +174,186 @@ pub fn path_matches_globs(path: &Path, globs: &[GlobEntry]) -> bool { .any(|g| glob_match(&format!("{}/{}", g.base, g.pattern), &path)) } -/// Given this input: a-{b,c}-d-{e,f} -/// We will get: -/// [ -/// a-b-d-e -/// a-b-d-f -/// a-c-d-e -/// a-c-d-f -/// ] -/// TODO: There is probably a way nicer way of doing this, but this works for now. -fn expand_braces(input: &str) -> Vec { - let mut result: Vec = vec![]; - - let mut in_braces = false; - let mut last_char: char = '\0'; - - let mut current = String::new(); - - // Given the input: a-{b,c}-d-{e,f}-g - // The template will look like this: ["a-", "-d-", "g"]. - let mut template: Vec = vec![]; - - // The branches will look like this: [["b", "c"], ["e", "f"]]. - let mut branches: Vec> = vec![]; - - for (i, c) in input.char_indices() { - let is_escaped = i > 0 && last_char == '\\'; - last_char = c; - - match c { - '{' if !is_escaped => { - // Ensure that when a new set of braces is opened, that we at least have 1 - // template. - if template.is_empty() { - template.push(String::new()); - } - - in_braces = true; - branches.push(vec![]); - template.push(String::new()); - } - '}' if !is_escaped => { - in_braces = false; - if let Some(last) = branches.last_mut() { - last.push(current.clone()); - } - current.clear(); - } - ',' if !is_escaped && in_braces => { - if let Some(last) = branches.last_mut() { - last.push(current.clone()); - } - current.clear(); - } - _ if in_braces => current.push(c), - _ => { - if template.is_empty() { - template.push(String::new()); - } - - if let Some(last) = template.last_mut() { - last.push(c); - } +#[cfg(test)] +mod tests { + use super::optimize_patterns; + use crate::GlobEntry; + use bexpand::Expression; + use std::process::Command; + use std::{fs, path}; + use tempfile::tempdir; + + fn create_folders(folders: &[&str]) -> String { + // Create a temporary working directory + let dir = tempdir().unwrap().into_path(); + + // Initialize this directory as a git repository + let _ = Command::new("git").arg("init").current_dir(&dir).output(); + + // Create the necessary files + for path in folders { + // Ensure we use the right path separator for the current platform + let path = dir.join(path.replace('/', path::MAIN_SEPARATOR.to_string().as_str())); + let parent = path.parent().unwrap(); + if !parent.exists() { + fs::create_dir_all(parent).unwrap(); } - }; - } - - // Ensure we have a string that we can start adding information too. - if !template.is_empty() && !branches.is_empty() { - result.push("".to_string()); - } - // Let's try to generate everything! - for (i, template) in template.into_iter().enumerate() { - // Append current template string to all existing results. - result = result.into_iter().map(|x| x + &template).collect(); - - // Get the results, and copy it for every single branch. - if let Some(branches) = branches.get(i) { - result = branches - .iter() - .flat_map(|branch| { - result - .clone() - .into_iter() - .map(|x| x + branch) - .collect::>() - }) - .collect::>(); + dbg!(&path); + fs::write(path, "").unwrap(); } - } - result -} + let base = format!("{}", dir.display()); -#[cfg(test)] -mod tests { - use super::get_fast_patterns; - use crate::GlobEntry; - use std::path::PathBuf; + base + } + + fn test(base: &str, sources: &[GlobEntry]) -> Vec { + // Resolve all content paths for the (temporary) current working directory + let sources: Vec = sources + .iter() + .map(|x| GlobEntry { + base: format!("{}{}", base, x.base), + pattern: x.pattern.clone(), + }) + .collect(); + + // Expand glob patterns into multiple `GlobEntry`s. + let sources = sources + .iter() + .flat_map(|source| { + let expression: Result = source.pattern[..].try_into(); + let Ok(expression) = expression else { + return vec![source.clone()]; + }; + + expression + .into_iter() + .filter_map(Result::ok) + .map(move |pattern| GlobEntry { + base: source.base.clone(), + pattern: pattern.into(), + }) + .collect::>() + }) + .collect::>(); + + let optimized_sources = optimize_patterns(&sources); + + let parent_dir = format!("{}", fs::canonicalize(base).unwrap().display()); + + // Remove the temporary directory from the base + optimized_sources + .into_iter() + .map(|source| GlobEntry { + // Normalize paths to use unix style separators + base: source.base.replace(&parent_dir, "").replace('\\', "/"), + pattern: source.pattern, + }) + .collect() + } #[test] fn it_should_keep_globs_that_start_with_file_wildcards_as_is() { - let actual = get_fast_patterns(&vec![GlobEntry { + let base = create_folders(&["projects"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "*.html".to_string(), + }], + ); + + let expected = vec![GlobEntry { base: "/projects".to_string(), pattern: "*.html".to_string(), - }]); - let expected = vec![(PathBuf::from("/projects"), vec!["*.html".to_string()])]; + }]; - assert_eq!(actual, expected,); + assert_eq!(actual, expected); } #[test] fn it_should_keep_globs_that_start_with_folder_wildcards_as_is() { - let actual = get_fast_patterns(&vec![GlobEntry { + let base = create_folders(&["projects"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "**/*.html".to_string(), + }], + ); + + let expected = vec![GlobEntry { base: "/projects".to_string(), pattern: "**/*.html".to_string(), - }]); - - let expected = vec![(PathBuf::from("/projects"), vec!["**/*.html".to_string()])]; + }]; assert_eq!(actual, expected,); } #[test] fn it_should_move_the_starting_folder_to_the_path() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "example/*.html".to_string(), - }]); - let expected = vec![( - PathBuf::from("/projects/example"), - vec!["*.html".to_string()], - )]; + let base = create_folders(&["projects/example"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "example/*.html".to_string(), + }], + ); + + let expected = vec![GlobEntry { + base: "/projects/example".to_string(), + pattern: "*.html".to_string(), + }]; assert_eq!(actual, expected,); } #[test] fn it_should_move_the_starting_folders_to_the_path() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "example/other/*.html".to_string(), - }]); - let expected = vec![( - PathBuf::from("/projects/example/other"), - vec!["*.html".to_string()], - )]; + let base = create_folders(&["projects/example/other"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "example/other/*.html".to_string(), + }], + ); + + let expected = vec![GlobEntry { + base: "/projects/example/other".to_string(), + pattern: "*.html".to_string(), + }]; assert_eq!(actual, expected,); } #[test] fn it_should_branch_expandable_folders() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "{foo,bar}/*.html".to_string(), - }]); + let base = create_folders(&["projects/foo", "projects/bar"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "{foo,bar}/*.html".to_string(), + }], + ); let expected = vec![ - (PathBuf::from("/projects/foo"), vec!["*.html".to_string()]), - (PathBuf::from("/projects/bar"), vec!["*.html".to_string()]), + GlobEntry { + base: "/projects/bar".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/foo".to_string(), + pattern: "*.html".to_string(), + }, ]; assert_eq!(actual, expected,); @@ -342,27 +361,38 @@ mod tests { #[test] fn it_should_expand_multiple_expansions_in_the_same_folder() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "a-{b,c}-d-{e,f}-g/*.html".to_string(), - }]); + let base = create_folders(&[ + "projects/a-b-d-e-g", + "projects/a-b-d-f-g", + "projects/a-c-d-e-g", + "projects/a-c-d-f-g", + ]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "a-{b,c}-d-{e,f}-g/*.html".to_string(), + }], + ); + let expected = vec![ - ( - PathBuf::from("/projects/a-b-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-b-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-d-f-g"), - vec!["*.html".to_string()], - ), + GlobEntry { + base: "/projects/a-b-d-e-g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-b-d-f-g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-d-e-g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-d-f-g".to_string(), + pattern: "*.html".to_string(), + }, ]; assert_eq!(actual, expected,); @@ -370,75 +400,98 @@ mod tests { #[test] fn multiple_expansions_per_folder_starting_at_the_root() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "{a,b}-c-{d,e}-f/{b,c}-d-{e,f}-g/*.html".to_string(), - }]); + let base = create_folders(&[ + "projects/a-c-d-f/b-d-e-g", + "projects/a-c-d-f/b-d-f-g", + "projects/a-c-d-f/c-d-e-g", + "projects/a-c-d-f/c-d-f-g", + "projects/a-c-e-f/b-d-e-g", + "projects/a-c-e-f/b-d-f-g", + "projects/a-c-e-f/c-d-e-g", + "projects/a-c-e-f/c-d-f-g", + "projects/b-c-d-f/b-d-e-g", + "projects/b-c-d-f/b-d-f-g", + "projects/b-c-d-f/c-d-e-g", + "projects/b-c-d-f/c-d-f-g", + "projects/b-c-e-f/b-d-e-g", + "projects/b-c-e-f/b-d-f-g", + "projects/b-c-e-f/c-d-e-g", + "projects/b-c-e-f/c-d-f-g", + ]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "{a,b}-c-{d,e}-f/{b,c}-d-{e,f}-g/*.html".to_string(), + }], + ); + let expected = vec![ - ( - PathBuf::from("/projects/a-c-d-f/b-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-d-f/b-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-e-f/b-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-e-f/b-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-d-f/c-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-d-f/c-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-e-f/c-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-e-f/c-d-e-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-d-f/b-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-d-f/b-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-e-f/b-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-e-f/b-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-d-f/c-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-d-f/c-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a-c-e-f/c-d-f-g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/b-c-e-f/c-d-f-g"), - vec!["*.html".to_string()], - ), + GlobEntry { + base: "/projects/a-c-d-f/b-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-d-f/b-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-d-f/c-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-d-f/c-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-e-f/b-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-e-f/b-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-e-f/c-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a-c-e-f/c-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-d-f/b-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-d-f/b-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-d-f/c-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-d-f/c-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-e-f/b-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-e-f/b-d-f-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-e-f/c-d-e-g".into(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/b-c-e-f/c-d-f-g".into(), + pattern: "*.html".to_string(), + }, ]; assert_eq!(actual, expected,); @@ -446,20 +499,25 @@ mod tests { #[test] fn it_should_stop_expanding_once_we_hit_a_wildcard() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "{foo,bar}/example/**/{baz,qux}/*.html".to_string(), - }]); + let base = create_folders(&["projects/bar/example", "projects/foo/example"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "{foo,bar}/example/**/{baz,qux}/*.html".to_string(), + }], + ); let expected = vec![ - ( - PathBuf::from("/projects/foo/example"), - vec!["**/{baz,qux}/*.html".to_string()], - ), - ( - PathBuf::from("/projects/bar/example"), - vec!["**/{baz,qux}/*.html".to_string()], - ), + GlobEntry { + base: "/projects/bar/example".to_string(), + pattern: "{**/baz/*.html,**/qux/*.html}".to_string(), + }, + GlobEntry { + base: "/projects/foo/example".to_string(), + pattern: "{**/baz/*.html,**/qux/*.html}".to_string(), + }, ]; assert_eq!(actual, expected,); @@ -467,41 +525,60 @@ mod tests { #[test] fn it_should_keep_the_negation_symbol_for_all_new_patterns() { - let actual = get_fast_patterns(&vec![GlobEntry { + let base = create_folders(&["projects"]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "!{foo,bar}/*.html".to_string(), + }], + ); + + let expected = vec![GlobEntry { base: "/projects".to_string(), - pattern: "!{foo,bar}/*.html".to_string(), - }]); - let expected = vec![ - (PathBuf::from("/projects/foo"), vec!["!*.html".to_string()]), - (PathBuf::from("/projects/bar"), vec!["!*.html".to_string()]), - ]; + // TODO: This is wrong, because `!` should be in front. But right now we don't support + // `@source "!../foo/bar";` anyway. + pattern: "{!bar/*.html,!foo/*.html}".to_string(), + }]; assert_eq!(actual, expected,); } #[test] fn it_should_expand_a_complex_example() { - let actual = get_fast_patterns(&vec![GlobEntry { - base: "/projects".to_string(), - pattern: "a/{b,c}/d/{e,f}/g/*.html".to_string(), - }]); + let base = create_folders(&[ + "projects/a/b/d/e/g", + "projects/a/b/d/f/g", + "projects/a/c/d/e/g", + "projects/a/c/d/f/g", + ]); + + let actual = test( + &base, + &[GlobEntry { + base: "/projects".to_string(), + pattern: "a/{b,c}/d/{e,f}/g/*.html".to_string(), + }], + ); + let expected = vec![ - ( - PathBuf::from("/projects/a/b/d/e/g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a/c/d/e/g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a/b/d/f/g"), - vec!["*.html".to_string()], - ), - ( - PathBuf::from("/projects/a/c/d/f/g"), - vec!["*.html".to_string()], - ), + GlobEntry { + base: "/projects/a/b/d/e/g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a/b/d/f/g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a/c/d/e/g".to_string(), + pattern: "*.html".to_string(), + }, + GlobEntry { + base: "/projects/a/c/d/f/g".to_string(), + pattern: "*.html".to_string(), + }, ]; assert_eq!(actual, expected,); From 60839781ca85a6355f3f403a29538844030bb28e Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 19:49:12 +0100 Subject: [PATCH 10/17] ensure we can compare `GlobEntry` instances --- crates/oxide/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/oxide/src/lib.rs b/crates/oxide/src/lib.rs index 4e4e7749f731..1789322ecbc7 100644 --- a/crates/oxide/src/lib.rs +++ b/crates/oxide/src/lib.rs @@ -57,7 +57,7 @@ pub struct ScanResult { pub globs: Vec, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct GlobEntry { pub base: String, pub pattern: String, From d0efb45d33a523401cc444e83e6f32bf77a29855 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 19:49:22 +0100 Subject: [PATCH 11/17] ensure the patterns are prepended with the base This is to ensure that matching files is correct. --- crates/oxide/src/lib.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/oxide/src/lib.rs b/crates/oxide/src/lib.rs index 1789322ecbc7..7d6f5caafba8 100644 --- a/crates/oxide/src/lib.rs +++ b/crates/oxide/src/lib.rs @@ -273,7 +273,15 @@ impl Scanner { let hoisted = hoist_static_glob_parts(&glob_sources); for source in &hoisted { - let Ok(glob) = Glob::new(&source.base) else { + // We need to combine the base and the pattern, otherwise a pattern that looks like + // `*.html`, will never match a path that looks like + // `/my-project/project-a/index.html`, because it contains `/`. + // + // We can't prepend `**/`, because then `/my-project/project-a/nested/index.html` would + // match as well. + // + // Instead we combine the base and the pattern as a single glob pattern. + let Ok(glob) = Glob::new(&format!("{}/{}", source.base, source.pattern)) else { continue; }; From 8e68a9a1c408ddb872311ab9a391edd7836ecad3 Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 19:49:53 +0100 Subject: [PATCH 12/17] refactor: move `parent_dir` outside of loop --- crates/oxide/tests/scanner.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/crates/oxide/tests/scanner.rs b/crates/oxide/tests/scanner.rs index fe15be414fb2..14ab350c456b 100644 --- a/crates/oxide/tests/scanner.rs +++ b/crates/oxide/tests/scanner.rs @@ -66,17 +66,16 @@ mod scanner { )); } + let parent_dir = format!( + "{}{}", + fs::canonicalize(&base).unwrap().display(), + path::MAIN_SEPARATOR + ); + paths = paths .into_iter() .map(|x| { - let parent_dir = format!( - "{}{}", - fs::canonicalize(&base).unwrap().display(), - path::MAIN_SEPARATOR - ); - x.replace(&parent_dir, "") - // Normalize paths to use unix style separators - .replace('\\', "/") + x.replace(&parent_dir, "").replace('\\', "/") // Normalize paths to use unix style separators }) .collect(); From b5f25329a20a799bcbda1cc926bbcd0a55cdb87e Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 20:04:22 +0100 Subject: [PATCH 13/17] ensure glob entry without pattern is scanner correctly --- crates/oxide/src/glob.rs | 8 +++++++- crates/oxide/src/lib.rs | 16 +++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/crates/oxide/src/glob.rs b/crates/oxide/src/glob.rs index 57199b43fe2f..f1e714cef1a7 100644 --- a/crates/oxide/src/glob.rs +++ b/crates/oxide/src/glob.rs @@ -47,7 +47,13 @@ pub fn hoist_static_glob_parts(entries: &Vec) -> Vec { let pattern = match dynamic_part { Some(dynamic_part) => dynamic_part, - None => "**/*".to_owned(), + None => { + if base.is_dir() { + "**/*".to_owned() + } else { + "".to_owned() + } + } }; result.push(GlobEntry { diff --git a/crates/oxide/src/lib.rs b/crates/oxide/src/lib.rs index 7d6f5caafba8..9c78ea2fc9d3 100644 --- a/crates/oxide/src/lib.rs +++ b/crates/oxide/src/lib.rs @@ -271,17 +271,27 @@ impl Scanner { // Turn `Vec<&GlobEntry>` in `Vec` let glob_sources: Vec<_> = glob_sources.into_iter().cloned().collect(); let hoisted = hoist_static_glob_parts(&glob_sources); + dbg!(&glob_sources, &hoisted); for source in &hoisted { - // We need to combine the base and the pattern, otherwise a pattern that looks like - // `*.html`, will never match a path that looks like + // If the pattern is empty, then the base points to a specific file or folder already + // if it doesn't contain any dynamic parts. In that case we can use the base as the + // pattern. + // + // Otherwise we need to combine the base and the pattern, otherwise a pattern that + // looks like `*.html`, will never match a path that looks like // `/my-project/project-a/index.html`, because it contains `/`. // // We can't prepend `**/`, because then `/my-project/project-a/nested/index.html` would // match as well. // // Instead we combine the base and the pattern as a single glob pattern. - let Ok(glob) = Glob::new(&format!("{}/{}", source.base, source.pattern)) else { + let mut full_pattern = source.base.clone(); + if !source.pattern.is_empty() { + full_pattern.push('/'); + full_pattern.push_str(&source.pattern); + } + let Ok(glob) = Glob::new(&full_pattern) else { continue; }; From 12a249c0d59ba775be9847c14ea1b53a77738ddc Mon Sep 17 00:00:00 2001 From: Robin Malfait Date: Mon, 28 Oct 2024 20:05:20 +0100 Subject: [PATCH 14/17] remove dbg statements --- crates/oxide/src/glob.rs | 1 - crates/oxide/src/lib.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/crates/oxide/src/glob.rs b/crates/oxide/src/glob.rs index f1e714cef1a7..c391d771c710 100644 --- a/crates/oxide/src/glob.rs +++ b/crates/oxide/src/glob.rs @@ -205,7 +205,6 @@ mod tests { fs::create_dir_all(parent).unwrap(); } - dbg!(&path); fs::write(path, "").unwrap(); } diff --git a/crates/oxide/src/lib.rs b/crates/oxide/src/lib.rs index 9c78ea2fc9d3..0c3cf6e360c1 100644 --- a/crates/oxide/src/lib.rs +++ b/crates/oxide/src/lib.rs @@ -271,7 +271,6 @@ impl Scanner { // Turn `Vec<&GlobEntry>` in `Vec` let glob_sources: Vec<_> = glob_sources.into_iter().cloned().collect(); let hoisted = hoist_static_glob_parts(&glob_sources); - dbg!(&glob_sources, &hoisted); for source in &hoisted { // If the pattern is empty, then the base points to a specific file or folder already From 93b629a6a48765945341bfcdfc59344ca28f3597 Mon Sep 17 00:00:00 2001 From: Jordan Pittman Date: Mon, 28 Oct 2024 16:15:07 -0400 Subject: [PATCH 15/17] Store candidates per-module --- packages/@tailwindcss-vite/src/index.ts | 38 ++++++++++++------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/packages/@tailwindcss-vite/src/index.ts b/packages/@tailwindcss-vite/src/index.ts index b045ab3505ae..e0874d58f0d7 100644 --- a/packages/@tailwindcss-vite/src/index.ts +++ b/packages/@tailwindcss-vite/src/index.ts @@ -35,7 +35,7 @@ export default function tailwindcss(): Plugin[] { // Note: To improve performance, we do not remove candidates from this set. // This means a longer-ongoing dev mode session might contain candidates that // are no longer referenced in code. - let moduleGraphCandidates = new Set() + let moduleGraphCandidates = new DefaultMap>(() => new Set()) let moduleGraphScanner = new Scanner({}) let roots: DefaultMap = new DefaultMap( @@ -46,7 +46,7 @@ export default function tailwindcss(): Plugin[] { let updated = false for (let candidate of moduleGraphScanner.scanFiles([{ content, extension }])) { updated = true - moduleGraphCandidates.add(candidate) + moduleGraphCandidates.get(id).add(candidate) } if (updated) { @@ -348,14 +348,9 @@ class Root { // root. private dependencies = new Set() - // Whether to include candidates from the module graph. This is disabled when - // the user provides `source(none)` to essentially disable auto source - // detection. - private includeCandidatesFromModuleGraph = true - constructor( private id: string, - private getSharedCandidates: () => Set, + private getSharedCandidates: () => Map>, private base: string, ) {} @@ -387,20 +382,14 @@ class Root { let sources = (() => { // Disable auto source detection if (this.compiler.root === 'none') { - this.includeCandidatesFromModuleGraph = false return [] } // No root specified, use the module graph if (this.compiler.root === null) { - this.includeCandidatesFromModuleGraph = true - return [] } - // TODO: In a follow up PR we want this filter this against the module graph. - this.includeCandidatesFromModuleGraph = true - // Use the specified root return [this.compiler.root] })().concat(this.compiler.globs) @@ -440,13 +429,24 @@ class Root { this.requiresRebuild = true env.DEBUG && console.time('[@tailwindcss/vite] Build CSS') - let result = this.compiler.build( - this.includeCandidatesFromModuleGraph - ? [...this.getSharedCandidates(), ...this.candidates] - : Array.from(this.candidates), - ) + let result = this.compiler.build([...this.sharedCandidates(), ...this.candidates]) env.DEBUG && console.timeEnd('[@tailwindcss/vite] Build CSS') return result } + + private sharedCandidates(): Set { + if (!this.compiler) return new Set() + if (this.compiler.root === 'none') return new Set() + + let shared = new Set() + + for (let [id, candidates] of this.getSharedCandidates()) { + for (let candidate of candidates) { + shared.add(candidate) + } + } + + return shared + } } From 938b3c5f94416a040e45b71fb13be99bec15cf47 Mon Sep 17 00:00:00 2001 From: Jordan Pittman Date: Mon, 28 Oct 2024 17:12:54 -0400 Subject: [PATCH 16/17] Add test for `source(none)` in Vite --- integrations/vite/index.test.ts | 86 +++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/integrations/vite/index.test.ts b/integrations/vite/index.test.ts index 89814ccc6168..c22f4cfb5a41 100644 --- a/integrations/vite/index.test.ts +++ b/integrations/vite/index.test.ts @@ -427,6 +427,92 @@ for (let transformer of ['postcss', 'lightningcss']) { }) }, ) + + test( + `source(none) disables looking at the module graph`, + { + fs: { + 'package.json': json`{}`, + 'pnpm-workspace.yaml': yaml` + # + packages: + - project-a + `, + 'project-a/package.json': txt` + { + "type": "module", + "dependencies": { + "@tailwindcss/vite": "workspace:^", + "tailwindcss": "workspace:^" + }, + "devDependencies": { + ${transformer === 'lightningcss' ? `"lightningcss": "^1.26.0",` : ''} + "vite": "^5.3.5" + } + } + `, + 'project-a/vite.config.ts': ts` + import tailwindcss from '@tailwindcss/vite' + import { defineConfig } from 'vite' + + export default defineConfig({ + css: ${transformer === 'postcss' ? '{}' : "{ transformer: 'lightningcss' }"}, + build: { cssMinify: false }, + plugins: [tailwindcss()], + }) + `, + 'project-a/index.html': html` + + + + +
Hello, world!
+ + `, + 'project-a/src/index.css': css` + @import 'tailwindcss' source(none); + @source '../../project-b/src/**/*.html'; + `, + 'project-b/src/index.html': html` +
+ `, + 'project-b/src/index.js': js` + const className = "content-['project-b/src/index.js']" + module.exports = { className } + `, + }, + }, + async ({ root, fs, exec }) => { + console.log(await exec('pnpm vite build', { cwd: path.join(root, 'project-a') })) + + let files = await fs.glob('project-a/dist/**/*.css') + expect(files).toHaveLength(1) + let [filename] = files[0] + + // `underline` and `m-2` are only present from files in the module graph + // which we've explicitly disabled with source(none) so they should not + // be present + await fs.expectFileNotToContain(filename, [ + // + candidate`underline`, + candidate`m-2`, + ]) + + // The files from `project-b` should be included because there is an + // explicit `@source` directive for it + await fs.expectFileToContain(filename, [ + // + candidate`flex`, + ]) + + // The explicit source directive only covers HTML files, so the JS file + // should not be included + await fs.expectFileNotToContain(filename, [ + // + candidate`content-['project-b/src/index.js']`, + ]) + }, + ) }) } From d2be2152ea8f218afdd2ff9e017f2d8432c09a76 Mon Sep 17 00:00:00 2001 From: Jordan Pittman Date: Mon, 28 Oct 2024 17:13:37 -0400 Subject: [PATCH 17/17] =?UTF-8?q?Filter=20module=20graph=20based=20on=20`s?= =?UTF-8?q?ource(=E2=80=A6)`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- integrations/vite/index.test.ts | 102 ++++++++++++++++++++++++ packages/@tailwindcss-vite/src/index.ts | 15 ++++ 2 files changed, 117 insertions(+) diff --git a/integrations/vite/index.test.ts b/integrations/vite/index.test.ts index c22f4cfb5a41..0cef7ebe1da9 100644 --- a/integrations/vite/index.test.ts +++ b/integrations/vite/index.test.ts @@ -513,6 +513,108 @@ for (let transformer of ['postcss', 'lightningcss']) { ]) }, ) + + test( + `source("…") filters the module graph`, + { + fs: { + 'package.json': json`{}`, + 'pnpm-workspace.yaml': yaml` + # + packages: + - project-a + `, + 'project-a/package.json': txt` + { + "type": "module", + "dependencies": { + "@tailwindcss/vite": "workspace:^", + "tailwindcss": "workspace:^" + }, + "devDependencies": { + ${transformer === 'lightningcss' ? `"lightningcss": "^1.26.0",` : ''} + "vite": "^5.3.5" + } + } + `, + 'project-a/vite.config.ts': ts` + import tailwindcss from '@tailwindcss/vite' + import { defineConfig } from 'vite' + + export default defineConfig({ + css: ${transformer === 'postcss' ? '{}' : "{ transformer: 'lightningcss' }"}, + build: { cssMinify: false }, + plugins: [tailwindcss()], + }) + `, + 'project-a/index.html': html` + + + + +
Hello, world!
+ + + `, + 'project-a/app/index.js': js` + const className = "content-['project-a/app/index.js']" + export default { className } + `, + 'project-a/src/index.css': css` + @import 'tailwindcss' source('../app'); + @source '../../project-b/src/**/*.html'; + `, + 'project-b/src/index.html': html` +
+ `, + 'project-b/src/index.js': js` + const className = "content-['project-b/src/index.js']" + module.exports = { className } + `, + }, + }, + async ({ root, fs, exec }) => { + await exec('pnpm vite build', { cwd: path.join(root, 'project-a') }) + + let files = await fs.glob('project-a/dist/**/*.css') + expect(files).toHaveLength(1) + let [filename] = files[0] + + // `underline` and `m-2` are present in files in the module graph but + // we've filtered the module graph such that we only look in + // `./app/**/*` so they should not be present + await fs.expectFileNotToContain(filename, [ + // + candidate`underline`, + candidate`m-2`, + candidate`content-['project-a/index.html']`, + ]) + + // We've filtered the module graph to only look in ./app/**/* so the + // candidates from that project should be present + await fs.expectFileToContain(filename, [ + // + candidate`content-['project-a/app/index.js']`, + ]) + + // Even through we're filtering the module graph explicit sources are + // additive and as such files from `project-b` should be included + // because there is an explicit `@source` directive for it + await fs.expectFileToContain(filename, [ + // + candidate`content-['project-b/src/index.html']`, + ]) + + // The explicit source directive only covers HTML files, so the JS file + // should not be included + await fs.expectFileNotToContain(filename, [ + // + candidate`content-['project-b/src/index.js']`, + ]) + }, + ) }) } diff --git a/packages/@tailwindcss-vite/src/index.ts b/packages/@tailwindcss-vite/src/index.ts index e0874d58f0d7..b1fffb9d128d 100644 --- a/packages/@tailwindcss-vite/src/index.ts +++ b/packages/@tailwindcss-vite/src/index.ts @@ -439,9 +439,24 @@ class Root { if (!this.compiler) return new Set() if (this.compiler.root === 'none') return new Set() + let root = this.compiler.root + let basePath = root ? path.resolve(root.base, root.pattern) : null + + function moduleIdIsAllowed(id: string) { + if (basePath === null) return true + + // This a virtual module that's not on the file system + // TODO: What should we do here? + if (!id.startsWith('/')) return true + + return id.startsWith(basePath) + } + let shared = new Set() for (let [id, candidates] of this.getSharedCandidates()) { + if (!moduleIdIsAllowed(id)) continue + for (let candidate of candidates) { shared.add(candidate) }