diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e3844c3050a6..2ebe99d27c96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,13 +25,13 @@ jobs: os: macos-14 # Exclude windows and macos from being built on feature branches - on-main-branch: - - ${{ github.ref == 'refs/heads/main' }} + run-all: + - ${{ github.ref == 'refs/heads/main' || contains(github.event.pull_request.body, '[ci-all]') }} exclude: - - on-main-branch: false + - run-all: false runner: name: Windows - - on-main-branch: false + - run-all: false runner: name: macOS diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 55fac5cb1cd4..761b769d75e2 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -33,13 +33,13 @@ jobs: - workers # Exclude windows and macos from being built on feature branches - on-main-branch: - - ${{ github.ref == 'refs/heads/main' }} + run-all: + - ${{ github.ref == 'refs/heads/main' || contains(github.event.pull_request.body, '[ci-all]') }} exclude: - - on-main-branch: false + - run-all: false runner: name: Windows - - on-main-branch: false + - run-all: false runner: name: macOS diff --git a/Cargo.lock b/Cargo.lock index 31b5529a6c21..ee6a465fcb69 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -199,9 +199,9 @@ checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "globset" -version = "0.4.15" +version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" +checksum = "54a1028dfc5f5df5da8a56a73e6c153c9a9708ec57232470703592a3f18e49f5" dependencies = [ "aho-corasick", "bstr", @@ -217,10 +217,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" dependencies = [ "bitflags", - "ignore", + "ignore 0.4.23 (registry+https://github.com/rust-lang/crates.io-index)", "walkdir", ] +[[package]] +name = "ignore" +version = "0.4.23" +dependencies = [ + "bstr", + "crossbeam-channel", + "crossbeam-deque", + "dunce", + "globset", + "log", + "memchr", + "regex-automata 0.4.8", + "same-file", + "walkdir", + "winapi-util", +] + [[package]] name = "ignore" version = "0.4.23" @@ -567,7 +584,7 @@ dependencies = [ "fancy-regex", "fast-glob", "globwalk", - "ignore", + "ignore 0.4.23", "log", "rayon", "regex", diff --git a/crates/ignore/COPYING b/crates/ignore/COPYING new file mode 100644 index 000000000000..bb9c20a094e4 --- /dev/null +++ b/crates/ignore/COPYING @@ -0,0 +1,3 @@ +This project is dual-licensed under the Unlicense and MIT licenses. + +You may use this code under the terms of either license. diff --git a/crates/ignore/Cargo.toml b/crates/ignore/Cargo.toml new file mode 100644 index 000000000000..b8ae1b1bf721 --- /dev/null +++ b/crates/ignore/Cargo.toml @@ -0,0 +1,45 @@ +[package] +name = "ignore" +version = "0.4.23" #:version +authors = ["Andrew Gallant "] +description = """ +A fast library for efficiently matching ignore files such as `.gitignore` +against file paths. +""" +documentation = "https://docs.rs/ignore" +homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore" +repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore" +readme = "README.md" +keywords = ["glob", "ignore", "gitignore", "pattern", "file"] +license = "Unlicense OR MIT" +edition = "2021" + +[lib] +name = "ignore" +bench = false + +[dependencies] +crossbeam-deque = "0.8.3" +globset = "0.4.16" +log = "0.4.20" +memchr = "2.6.3" +same-file = "1.0.6" +walkdir = "2.4.0" +dunce = "1.0.5" + +[dependencies.regex-automata] +version = "0.4.0" +default-features = false +features = ["std", "perf", "syntax", "meta", "nfa", "hybrid", "dfa-onepass"] + +[target.'cfg(windows)'.dependencies.winapi-util] +version = "0.1.2" + +[dev-dependencies] +bstr = { version = "1.6.2", default-features = false, features = ["std"] } +crossbeam-channel = "0.5.8" + +[features] +# DEPRECATED. It is a no-op. SIMD is done automatically through runtime +# dispatch. +simd-accel = [] diff --git a/crates/ignore/LICENSE-MIT b/crates/ignore/LICENSE-MIT new file mode 100644 index 000000000000..3b0a5dc09c1e --- /dev/null +++ b/crates/ignore/LICENSE-MIT @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Andrew Gallant + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/crates/ignore/README.md b/crates/ignore/README.md new file mode 100644 index 000000000000..a4c34e505cf3 --- /dev/null +++ b/crates/ignore/README.md @@ -0,0 +1,58 @@ +# ignore + +The ignore crate provides a fast recursive directory iterator that respects +various filters such as globs, file types and `.gitignore` files. This crate +also provides lower level direct access to gitignore and file type matchers. + +[![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions) +[![](https://img.shields.io/crates/v/ignore.svg)](https://crates.io/crates/ignore) + +Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/). + +### Documentation + +[https://docs.rs/ignore](https://docs.rs/ignore) + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +ignore = "0.4" +``` + +### Example + +This example shows the most basic usage of this crate. This code will +recursively traverse the current directory while automatically filtering out +files and directories according to ignore globs found in files like +`.ignore` and `.gitignore`: + +```rust,no_run +use ignore::Walk; + +for result in Walk::new("./") { + // Each item yielded by the iterator is either a directory entry or an + // error, so either print the path or the error. + match result { + Ok(entry) => println!("{}", entry.path().display()), + Err(err) => println!("ERROR: {}", err), + } +} +``` + +### Example: advanced + +By default, the recursive directory iterator will ignore hidden files and +directories. This can be disabled by building the iterator with `WalkBuilder`: + +```rust,no_run +use ignore::WalkBuilder; + +for result in WalkBuilder::new("./").hidden(false).build() { + println!("{:?}", result); +} +``` + +See the documentation for `WalkBuilder` for many other options. diff --git a/crates/ignore/UNLICENSE b/crates/ignore/UNLICENSE new file mode 100644 index 000000000000..68a49daad8ff --- /dev/null +++ b/crates/ignore/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/crates/ignore/examples/walk.rs b/crates/ignore/examples/walk.rs new file mode 100644 index 000000000000..5bbd10f2bc2d --- /dev/null +++ b/crates/ignore/examples/walk.rs @@ -0,0 +1,64 @@ +use std::{env, io::Write, path::Path}; + +use {bstr::ByteVec, ignore::WalkBuilder, walkdir::WalkDir}; + +fn main() { + let mut path = env::args().nth(1).unwrap(); + let mut parallel = false; + let mut simple = false; + let (tx, rx) = crossbeam_channel::bounded::(100); + if path == "parallel" { + path = env::args().nth(2).unwrap(); + parallel = true; + } else if path == "walkdir" { + path = env::args().nth(2).unwrap(); + simple = true; + } + + let stdout_thread = std::thread::spawn(move || { + let mut stdout = std::io::BufWriter::new(std::io::stdout()); + for dent in rx { + stdout.write(&*Vec::from_path_lossy(dent.path())).unwrap(); + stdout.write(b"\n").unwrap(); + } + }); + + if parallel { + let walker = WalkBuilder::new(path).threads(6).build_parallel(); + walker.run(|| { + let tx = tx.clone(); + Box::new(move |result| { + use ignore::WalkState::*; + + tx.send(DirEntry::Y(result.unwrap())).unwrap(); + Continue + }) + }); + } else if simple { + let walker = WalkDir::new(path); + for result in walker { + tx.send(DirEntry::X(result.unwrap())).unwrap(); + } + } else { + let walker = WalkBuilder::new(path).build(); + for result in walker { + tx.send(DirEntry::Y(result.unwrap())).unwrap(); + } + } + drop(tx); + stdout_thread.join().unwrap(); +} + +enum DirEntry { + X(walkdir::DirEntry), + Y(ignore::DirEntry), +} + +impl DirEntry { + fn path(&self) -> &Path { + match *self { + DirEntry::X(ref x) => x.path(), + DirEntry::Y(ref y) => y.path(), + } + } +} diff --git a/crates/ignore/src/default_types.rs b/crates/ignore/src/default_types.rs new file mode 100644 index 000000000000..2cf8ad80794b --- /dev/null +++ b/crates/ignore/src/default_types.rs @@ -0,0 +1,351 @@ +/// This list represents the default file types that ripgrep ships with. In +/// general, any file format is fair game, although it should generally be +/// limited to reasonably popular open formats. For other cases, you can add +/// types to each invocation of ripgrep with the '--type-add' flag. +/// +/// If you would like to add or improve this list, please file a PR: +/// . +/// +/// Please try to keep this list sorted lexicographically and wrapped to 79 +/// columns (inclusive). +#[rustfmt::skip] +pub(crate) const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[ + (&["ada"], &["*.adb", "*.ads"]), + (&["agda"], &["*.agda", "*.lagda"]), + (&["aidl"], &["*.aidl"]), + (&["alire"], &["alire.toml"]), + (&["amake"], &["*.mk", "*.bp"]), + (&["asciidoc"], &["*.adoc", "*.asc", "*.asciidoc"]), + (&["asm"], &["*.asm", "*.s", "*.S"]), + (&["asp"], &[ + "*.aspx", "*.aspx.cs", "*.aspx.vb", "*.ascx", "*.ascx.cs", + "*.ascx.vb", "*.asp" + ]), + (&["ats"], &["*.ats", "*.dats", "*.sats", "*.hats"]), + (&["avro"], &["*.avdl", "*.avpr", "*.avsc"]), + (&["awk"], &["*.awk"]), + (&["bat", "batch"], &["*.bat"]), + (&["bazel"], &[ + "*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "MODULE.bazel", + "WORKSPACE", "WORKSPACE.bazel", + ]), + (&["bitbake"], &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]), + (&["brotli"], &["*.br"]), + (&["buildstream"], &["*.bst"]), + (&["bzip2"], &["*.bz2", "*.tbz2"]), + (&["c"], &["*.[chH]", "*.[chH].in", "*.cats"]), + (&["cabal"], &["*.cabal"]), + (&["candid"], &["*.did"]), + (&["carp"], &["*.carp"]), + (&["cbor"], &["*.cbor"]), + (&["ceylon"], &["*.ceylon"]), + (&["clojure"], &["*.clj", "*.cljc", "*.cljs", "*.cljx"]), + (&["cmake"], &["*.cmake", "CMakeLists.txt"]), + (&["cmd"], &["*.bat", "*.cmd"]), + (&["cml"], &["*.cml"]), + (&["coffeescript"], &["*.coffee"]), + (&["config"], &["*.cfg", "*.conf", "*.config", "*.ini"]), + (&["coq"], &["*.v"]), + (&["cpp"], &[ + "*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl", + "*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in", + ]), + (&["creole"], &["*.creole"]), + (&["crystal"], &["Projectfile", "*.cr", "*.ecr", "shard.yml"]), + (&["cs"], &["*.cs"]), + (&["csharp"], &["*.cs"]), + (&["cshtml"], &["*.cshtml"]), + (&["csproj"], &["*.csproj"]), + (&["css"], &["*.css", "*.scss"]), + (&["csv"], &["*.csv"]), + (&["cuda"], &["*.cu", "*.cuh"]), + (&["cython"], &["*.pyx", "*.pxi", "*.pxd"]), + (&["d"], &["*.d"]), + (&["dart"], &["*.dart"]), + (&["devicetree"], &["*.dts", "*.dtsi"]), + (&["dhall"], &["*.dhall"]), + (&["diff"], &["*.patch", "*.diff"]), + (&["dita"], &["*.dita", "*.ditamap", "*.ditaval"]), + (&["docker"], &["*Dockerfile*"]), + (&["dockercompose"], &["docker-compose.yml", "docker-compose.*.yml"]), + (&["dts"], &["*.dts", "*.dtsi"]), + (&["dvc"], &["Dvcfile", "*.dvc"]), + (&["ebuild"], &["*.ebuild", "*.eclass"]), + (&["edn"], &["*.edn"]), + (&["elisp"], &["*.el"]), + (&["elixir"], &["*.ex", "*.eex", "*.exs", "*.heex", "*.leex", "*.livemd"]), + (&["elm"], &["*.elm"]), + (&["erb"], &["*.erb"]), + (&["erlang"], &["*.erl", "*.hrl"]), + (&["fennel"], &["*.fnl"]), + (&["fidl"], &["*.fidl"]), + (&["fish"], &["*.fish"]), + (&["flatbuffers"], &["*.fbs"]), + (&["fortran"], &[ + "*.f", "*.F", "*.f77", "*.F77", "*.pfo", + "*.f90", "*.F90", "*.f95", "*.F95", + ]), + (&["fsharp"], &["*.fs", "*.fsx", "*.fsi"]), + (&["fut"], &["*.fut"]), + (&["gap"], &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]), + (&["gn"], &["*.gn", "*.gni"]), + (&["go"], &["*.go"]), + (&["gprbuild"], &["*.gpr"]), + (&["gradle"], &[ + "*.gradle", "*.gradle.kts", "gradle.properties", "gradle-wrapper.*", + "gradlew", "gradlew.bat", + ]), + (&["graphql"], &["*.graphql", "*.graphqls"]), + (&["groovy"], &["*.groovy", "*.gradle"]), + (&["gzip"], &["*.gz", "*.tgz"]), + (&["h"], &["*.h", "*.hh", "*.hpp"]), + (&["haml"], &["*.haml"]), + (&["hare"], &["*.ha"]), + (&["haskell"], &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]), + (&["hbs"], &["*.hbs"]), + (&["hs"], &["*.hs", "*.lhs"]), + (&["html"], &["*.htm", "*.html", "*.ejs"]), + (&["hy"], &["*.hy"]), + (&["idris"], &["*.idr", "*.lidr"]), + (&["janet"], &["*.janet"]), + (&["java"], &["*.java", "*.jsp", "*.jspx", "*.properties"]), + (&["jinja"], &["*.j2", "*.jinja", "*.jinja2"]), + (&["jl"], &["*.jl"]), + (&["js"], &["*.js", "*.jsx", "*.vue", "*.cjs", "*.mjs"]), + (&["json"], &["*.json", "composer.lock", "*.sarif"]), + (&["jsonl"], &["*.jsonl"]), + (&["julia"], &["*.jl"]), + (&["jupyter"], &["*.ipynb", "*.jpynb"]), + (&["k"], &["*.k"]), + (&["kotlin"], &["*.kt", "*.kts"]), + (&["lean"], &["*.lean"]), + (&["less"], &["*.less"]), + (&["license"], &[ + // General + "COPYING", "COPYING[.-]*", + "COPYRIGHT", "COPYRIGHT[.-]*", + "EULA", "EULA[.-]*", + "licen[cs]e", "licen[cs]e.*", + "LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*", + "NOTICE", "NOTICE[.-]*", + "PATENTS", "PATENTS[.-]*", + "UNLICEN[CS]E", "UNLICEN[CS]E[.-]*", + // GPL (gpl.txt, etc.) + "agpl[.-]*", + "gpl[.-]*", + "lgpl[.-]*", + // Other license-specific (APACHE-2.0.txt, etc.) + "AGPL-*[0-9]*", + "APACHE-*[0-9]*", + "BSD-*[0-9]*", + "CC-BY-*", + "GFDL-*[0-9]*", + "GNU-*[0-9]*", + "GPL-*[0-9]*", + "LGPL-*[0-9]*", + "MIT-*[0-9]*", + "MPL-*[0-9]*", + "OFL-*[0-9]*", + ]), + (&["lilypond"], &["*.ly", "*.ily"]), + (&["lisp"], &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), + (&["lock"], &["*.lock", "package-lock.json"]), + (&["log"], &["*.log"]), + (&["lua"], &["*.lua"]), + (&["lz4"], &["*.lz4"]), + (&["lzma"], &["*.lzma"]), + (&["m4"], &["*.ac", "*.m4"]), + (&["make"], &[ + "[Gg][Nn][Uu]makefile", "[Mm]akefile", + "[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am", + "[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in", + "*.mk", "*.mak" + ]), + (&["mako"], &["*.mako", "*.mao"]), + (&["man"], &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]), + (&["markdown", "md"], &[ + "*.markdown", + "*.md", + "*.mdown", + "*.mdwn", + "*.mkd", + "*.mkdn", + "*.mdx", + ]), + (&["matlab"], &["*.m"]), + (&["meson"], &["meson.build", "meson_options.txt", "meson.options"]), + (&["minified"], &["*.min.html", "*.min.css", "*.min.js"]), + (&["mint"], &["*.mint"]), + (&["mk"], &["mkfile"]), + (&["ml"], &["*.ml"]), + (&["motoko"], &["*.mo"]), + (&["msbuild"], &[ + "*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets", + "*.sln", + ]), + (&["nim"], &["*.nim", "*.nimf", "*.nimble", "*.nims"]), + (&["nix"], &["*.nix"]), + (&["objc"], &["*.h", "*.m"]), + (&["objcpp"], &["*.h", "*.mm"]), + (&["ocaml"], &["*.ml", "*.mli", "*.mll", "*.mly"]), + (&["org"], &["*.org", "*.org_archive"]), + (&["pants"], &["BUILD"]), + (&["pascal"], &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]), + (&["pdf"], &["*.pdf"]), + (&["perl"], &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]), + (&["php"], &[ + // note that PHP 6 doesn't exist + // See: https://wiki.php.net/rfc/php6 + "*.php", "*.php3", "*.php4", "*.php5", "*.php7", "*.php8", + "*.pht", "*.phtml" + ]), + (&["po"], &["*.po"]), + (&["pod"], &["*.pod"]), + (&["postscript"], &["*.eps", "*.ps"]), + (&["prolog"], &["*.pl", "*.pro", "*.prolog", "*.P"]), + (&["protobuf"], &["*.proto"]), + (&["ps"], &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]), + (&["puppet"], &["*.epp", "*.erb", "*.pp", "*.rb"]), + (&["purs"], &["*.purs"]), + (&["py", "python"], &["*.py", "*.pyi"]), + (&["qmake"], &["*.pro", "*.pri", "*.prf"]), + (&["qml"], &["*.qml"]), + (&["r"], &["*.R", "*.r", "*.Rmd", "*.Rnw"]), + (&["racket"], &["*.rkt"]), + (&["raku"], &[ + "*.raku", "*.rakumod", "*.rakudoc", "*.rakutest", + "*.p6", "*.pl6", "*.pm6" + ]), + (&["rdoc"], &["*.rdoc"]), + (&["readme"], &["README*", "*README"]), + (&["reasonml"], &["*.re", "*.rei"]), + (&["red"], &["*.r", "*.red", "*.reds"]), + (&["rescript"], &["*.res", "*.resi"]), + (&["robot"], &["*.robot"]), + (&["rst"], &["*.rst"]), + (&["ruby"], &[ + // Idiomatic files + "config.ru", "Gemfile", ".irbrc", "Rakefile", + // Extensions + "*.gemspec", "*.rb", "*.rbw" + ]), + (&["rust"], &["*.rs"]), + (&["sass"], &["*.sass", "*.scss"]), + (&["scala"], &["*.scala", "*.sbt"]), + (&["sh"], &[ + // Portable/misc. init files + ".login", ".logout", ".profile", "profile", + // bash-specific init files + ".bash_login", "bash_login", + ".bash_logout", "bash_logout", + ".bash_profile", "bash_profile", + ".bashrc", "bashrc", "*.bashrc", + // csh-specific init files + ".cshrc", "*.cshrc", + // ksh-specific init files + ".kshrc", "*.kshrc", + // tcsh-specific init files + ".tcshrc", + // zsh-specific init files + ".zshenv", "zshenv", + ".zlogin", "zlogin", + ".zlogout", "zlogout", + ".zprofile", "zprofile", + ".zshrc", "zshrc", + // Extensions + "*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh", + ]), + (&["slim"], &["*.skim", "*.slim", "*.slime"]), + (&["smarty"], &["*.tpl"]), + (&["sml"], &["*.sml", "*.sig"]), + (&["solidity"], &["*.sol"]), + (&["soy"], &["*.soy"]), + (&["spark"], &["*.spark"]), + (&["spec"], &["*.spec"]), + (&["sql"], &["*.sql", "*.psql"]), + (&["stylus"], &["*.styl"]), + (&["sv"], &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]), + (&["svelte"], &["*.svelte"]), + (&["svg"], &["*.svg"]), + (&["swift"], &["*.swift"]), + (&["swig"], &["*.def", "*.i"]), + (&["systemd"], &[ + "*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path", + "*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target", + "*.timer", + ]), + (&["taskpaper"], &["*.taskpaper"]), + (&["tcl"], &["*.tcl"]), + (&["tex"], &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]), + (&["texinfo"], &["*.texi"]), + (&["textile"], &["*.textile"]), + (&["tf"], &[ + "*.tf", "*.auto.tfvars", "terraform.tfvars", "*.tf.json", + "*.auto.tfvars.json", "terraform.tfvars.json", "*.terraformrc", + "terraform.rc", "*.tfrc", "*.terraform.lock.hcl", + ]), + (&["thrift"], &["*.thrift"]), + (&["toml"], &["*.toml", "Cargo.lock"]), + (&["ts", "typescript"], &["*.ts", "*.tsx", "*.cts", "*.mts"]), + (&["twig"], &["*.twig"]), + (&["txt"], &["*.txt"]), + (&["typoscript"], &["*.typoscript", "*.ts"]), + (&["usd"], &["*.usd", "*.usda", "*.usdc"]), + (&["v"], &["*.v", "*.vsh"]), + (&["vala"], &["*.vala"]), + (&["vb"], &["*.vb"]), + (&["vcl"], &["*.vcl"]), + (&["verilog"], &["*.v", "*.vh", "*.sv", "*.svh"]), + (&["vhdl"], &["*.vhd", "*.vhdl"]), + (&["vim"], &[ + "*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc", + ]), + (&["vimscript"], &[ + "*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc", + ]), + (&["vue"], &["*.vue"]), + (&["webidl"], &["*.idl", "*.webidl", "*.widl"]), + (&["wgsl"], &["*.wgsl"]), + (&["wiki"], &["*.mediawiki", "*.wiki"]), + (&["xml"], &[ + "*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb", + "*.rng", "*.sch", "*.xhtml", + ]), + (&["xz"], &["*.xz", "*.txz"]), + (&["yacc"], &["*.y"]), + (&["yaml"], &["*.yaml", "*.yml"]), + (&["yang"], &["*.yang"]), + (&["z"], &["*.Z"]), + (&["zig"], &["*.zig"]), + (&["zsh"], &[ + ".zshenv", "zshenv", + ".zlogin", "zlogin", + ".zlogout", "zlogout", + ".zprofile", "zprofile", + ".zshrc", "zshrc", + "*.zsh", + ]), + (&["zstd"], &["*.zst", "*.zstd"]), +]; + +#[cfg(test)] +mod tests { + use super::DEFAULT_TYPES; + + #[test] + fn default_types_are_sorted() { + let mut names = DEFAULT_TYPES.iter().map(|(aliases, _)| aliases[0]); + let Some(mut previous_name) = names.next() else { + return; + }; + for name in names { + assert!( + name > previous_name, + r#""{}" should be sorted before "{}" in `DEFAULT_TYPES`"#, + name, + previous_name + ); + previous_name = name; + } + } +} diff --git a/crates/ignore/src/dir.rs b/crates/ignore/src/dir.rs new file mode 100644 index 000000000000..9bbf1442b382 --- /dev/null +++ b/crates/ignore/src/dir.rs @@ -0,0 +1,1196 @@ +// This module provides a data structure, `Ignore`, that connects "directory +// traversal" with "ignore matchers." Specifically, it knows about gitignore +// semantics and precedence, and is organized based on directory hierarchy. +// Namely, every matcher logically corresponds to ignore rules from a single +// directory, and points to the matcher for its corresponding parent directory. +// In this sense, `Ignore` is a *persistent* data structure. +// +// This design was specifically chosen to make it possible to use this data +// structure in a parallel directory iterator. +// +// My initial intention was to expose this module as part of this crate's +// public API, but I think the data structure's public API is too complicated +// with non-obvious failure modes. Alas, such things haven't been documented +// well. + +use std::{ + collections::HashMap, + ffi::{OsStr, OsString}, + fs::{File, FileType}, + io::{self, BufRead}, + path::{Path, PathBuf}, + sync::{Arc, RwLock, Weak}, +}; + +use crate::{ + gitignore::{self, Gitignore, GitignoreBuilder}, + overrides::{self, Override}, + pathutil::{is_hidden, strip_prefix}, + types::{self, Types}, + walk::DirEntry, + {Error, Match, PartialErrorBuilder}, +}; + +/// IgnoreMatch represents information about where a match came from when using +/// the `Ignore` matcher. +#[derive(Clone, Debug)] +#[allow(dead_code)] +pub(crate) struct IgnoreMatch<'a>(IgnoreMatchInner<'a>); + +/// IgnoreMatchInner describes precisely where the match information came from. +/// This is private to allow expansion to more matchers in the future. +#[derive(Clone, Debug)] +#[allow(dead_code)] +enum IgnoreMatchInner<'a> { + Override(overrides::Glob<'a>), + Gitignore(&'a gitignore::Glob), + Types(types::Glob<'a>), + Hidden, +} + +impl<'a> IgnoreMatch<'a> { + fn overrides(x: overrides::Glob<'a>) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Override(x)) + } + + fn gitignore(x: &'a gitignore::Glob) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Gitignore(x)) + } + + fn types(x: types::Glob<'a>) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Types(x)) + } + + fn hidden() -> IgnoreMatch<'static> { + IgnoreMatch(IgnoreMatchInner::Hidden) + } +} + +/// Options for the ignore matcher, shared between the matcher itself and the +/// builder. +#[derive(Clone, Copy, Debug)] +struct IgnoreOptions { + /// Whether to ignore hidden file paths or not. + hidden: bool, + /// Whether to read .ignore files. + ignore: bool, + /// Whether to respect any ignore files in parent directories. + parents: bool, + /// Whether to read git's global gitignore file. + git_global: bool, + /// Whether to read .gitignore files. + git_ignore: bool, + /// Whether to read .git/info/exclude files. + git_exclude: bool, + /// Whether to ignore files case insensitively + ignore_case_insensitive: bool, + /// Whether a git repository must be present in order to apply any + /// git-related ignore rules. + require_git: bool, +} + +/// Ignore is a matcher useful for recursively walking one or more directories. +#[derive(Clone, Debug)] +pub(crate) struct Ignore(Arc); + +#[derive(Clone, Debug)] +struct IgnoreInner { + /// A map of all existing directories that have already been + /// compiled into matchers. + /// + /// Note that this is never used during matching, only when adding new + /// parent directory matchers. This avoids needing to rebuild glob sets for + /// parent directories if many paths are being searched. + compiled: Arc>>>, + /// The path to the directory that this matcher was built from. + dir: PathBuf, + /// An override matcher (default is empty). + overrides: Arc, + /// A file type matcher. + types: Arc, + /// The parent directory to match next. + /// + /// If this is the root directory or there are otherwise no more + /// directories to match, then `parent` is `None`. + parent: Option, + /// Whether this is an absolute parent matcher, as added by add_parent. + is_absolute_parent: bool, + /// The absolute base path of this matcher. Populated only if parent + /// directories are added. + absolute_base: Option>, + /// Explicit global ignore matchers specified by the caller. + explicit_ignores: Arc>, + /// Ignore files used in addition to `.ignore` + custom_ignore_filenames: Arc>, + /// The matcher for custom ignore files + custom_ignore_matcher: Gitignore, + /// The matcher for .ignore files. + ignore_matcher: Gitignore, + /// A global gitignore matcher, usually from $XDG_CONFIG_HOME/git/ignore. + git_global_matcher: Arc, + /// The matcher for .gitignore files. + git_ignore_matcher: Gitignore, + /// Special matcher for `.git/info/exclude` files. + git_exclude_matcher: Gitignore, + /// Whether this directory contains a .git sub-directory. + has_git: bool, + /// Ignore config. + opts: IgnoreOptions, +} + +impl Ignore { + /// Return the directory path of this matcher. + pub(crate) fn path(&self) -> &Path { + &self.0.dir + } + + /// Return true if this matcher has no parent. + pub(crate) fn is_root(&self) -> bool { + self.0.parent.is_none() + } + + /// Returns true if this matcher was added via the `add_parents` method. + pub(crate) fn is_absolute_parent(&self) -> bool { + self.0.is_absolute_parent + } + + /// Return this matcher's parent, if one exists. + pub(crate) fn parent(&self) -> Option { + self.0.parent.clone() + } + + /// Create a new `Ignore` matcher with the parent directories of `dir`. + /// + /// Note that this can only be called on an `Ignore` matcher with no + /// parents (i.e., `is_root` returns `true`). This will panic otherwise. + pub(crate) fn add_parents>(&self, path: P) -> (Ignore, Option) { + if !self.0.opts.parents + && !self.0.opts.git_ignore + && !self.0.opts.git_exclude + && !self.0.opts.git_global + { + // If we never need info from parent directories, then don't do + // anything. + return (self.clone(), None); + } + if !self.is_root() { + panic!("Ignore::add_parents called on non-root matcher"); + } + // CHANGED: Use `dunce::canonicalize` as we use it everywhere else. + let absolute_base = match dunce::canonicalize(path.as_ref()) { + Ok(path) => Arc::new(path), + Err(_) => { + // There's not much we can do here, so just return our + // existing matcher. We drop the error to be consistent + // with our general pattern of ignoring I/O errors when + // processing ignore files. + return (self.clone(), None); + } + }; + // List of parents, from child to root. + let mut parents = vec![]; + let mut path = &**absolute_base; + while let Some(parent) = path.parent() { + parents.push(parent); + path = parent; + } + let mut errs = PartialErrorBuilder::default(); + let mut ig = self.clone(); + for parent in parents.into_iter().rev() { + let mut compiled = self.0.compiled.write().unwrap(); + if let Some(weak) = compiled.get(parent.as_os_str()) { + if let Some(prebuilt) = weak.upgrade() { + ig = Ignore(prebuilt); + continue; + } + } + let (mut igtmp, err) = ig.add_child_path(parent); + errs.maybe_push(err); + igtmp.is_absolute_parent = true; + igtmp.absolute_base = Some(absolute_base.clone()); + igtmp.has_git = if self.0.opts.require_git && self.0.opts.git_ignore { + parent.join(".git").exists() + } else { + false + }; + let ig_arc = Arc::new(igtmp); + ig = Ignore(ig_arc.clone()); + compiled.insert(parent.as_os_str().to_os_string(), Arc::downgrade(&ig_arc)); + } + (ig, errs.into_error_option()) + } + + /// Create a new `Ignore` matcher for the given child directory. + /// + /// Since building the matcher may require reading from multiple + /// files, it's possible that this method partially succeeds. Therefore, + /// a matcher is always returned (which may match nothing) and an error is + /// returned if it exists. + /// + /// Note that all I/O errors are completely ignored. + pub(crate) fn add_child>(&self, dir: P) -> (Ignore, Option) { + let (ig, err) = self.add_child_path(dir.as_ref()); + (Ignore(Arc::new(ig)), err) + } + + /// Like add_child, but takes a full path and returns an IgnoreInner. + fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option) { + let git_type = + if self.0.opts.require_git && (self.0.opts.git_ignore || self.0.opts.git_exclude) { + dir.join(".git").metadata().ok().map(|md| md.file_type()) + } else { + None + }; + let has_git = git_type.map(|_| true).unwrap_or(false); + + let mut errs = PartialErrorBuilder::default(); + let custom_ig_matcher = if self.0.custom_ignore_filenames.is_empty() { + Gitignore::empty() + } else { + let (m, err) = create_gitignore( + &dir, + &dir, + &self.0.custom_ignore_filenames, + self.0.opts.ignore_case_insensitive, + ); + errs.maybe_push(err); + m + }; + let ig_matcher = if !self.0.opts.ignore { + Gitignore::empty() + } else { + let (m, err) = create_gitignore( + &dir, + &dir, + &[".ignore"], + self.0.opts.ignore_case_insensitive, + ); + errs.maybe_push(err); + m + }; + let gi_matcher = if !self.0.opts.git_ignore { + Gitignore::empty() + } else { + let (m, err) = create_gitignore( + &dir, + &dir, + &[".gitignore"], + self.0.opts.ignore_case_insensitive, + ); + errs.maybe_push(err); + m + }; + let gi_exclude_matcher = if !self.0.opts.git_exclude { + Gitignore::empty() + } else { + match resolve_git_commondir(dir, git_type) { + Ok(git_dir) => { + let (m, err) = create_gitignore( + &dir, + &git_dir, + &["info/exclude"], + self.0.opts.ignore_case_insensitive, + ); + errs.maybe_push(err); + m + } + Err(err) => { + errs.maybe_push(err); + Gitignore::empty() + } + } + }; + let ig = IgnoreInner { + compiled: self.0.compiled.clone(), + dir: dir.to_path_buf(), + overrides: self.0.overrides.clone(), + types: self.0.types.clone(), + parent: Some(self.clone()), + is_absolute_parent: false, + absolute_base: self.0.absolute_base.clone(), + explicit_ignores: self.0.explicit_ignores.clone(), + custom_ignore_filenames: self.0.custom_ignore_filenames.clone(), + custom_ignore_matcher: custom_ig_matcher, + ignore_matcher: ig_matcher, + git_global_matcher: self.0.git_global_matcher.clone(), + git_ignore_matcher: gi_matcher, + git_exclude_matcher: gi_exclude_matcher, + has_git, + opts: self.0.opts, + }; + (ig, errs.into_error_option()) + } + + /// Returns true if at least one type of ignore rule should be matched. + fn has_any_ignore_rules(&self) -> bool { + let opts = self.0.opts; + let has_custom_ignore_files = !self.0.custom_ignore_filenames.is_empty(); + let has_explicit_ignores = !self.0.explicit_ignores.is_empty(); + + opts.ignore + || opts.git_global + || opts.git_ignore + || opts.git_exclude + || has_custom_ignore_files + || has_explicit_ignores + } + + /// Like `matched`, but works with a directory entry instead. + pub(crate) fn matched_dir_entry<'a>(&'a self, dent: &DirEntry) -> Match> { + let m = self.matched(dent.path(), dent.is_dir()); + if m.is_none() && self.0.opts.hidden && is_hidden(dent) { + return Match::Ignore(IgnoreMatch::hidden()); + } + m + } + + /// Returns a match indicating whether the given file path should be + /// ignored or not. + /// + /// The match contains information about its origin. + fn matched<'a, P: AsRef>(&'a self, path: P, is_dir: bool) -> Match> { + // We need to be careful with our path. If it has a leading ./, then + // strip it because it causes nothing but trouble. + let mut path = path.as_ref(); + if let Some(p) = strip_prefix("./", path) { + path = p; + } + // Match against the override patterns. If an override matches + // regardless of whether it's whitelist/ignore, then we quit and + // return that result immediately. Overrides have the highest + // precedence. + if !self.0.overrides.is_empty() { + let mat = self + .0 + .overrides + .matched(path, is_dir) + .map(IgnoreMatch::overrides); + if !mat.is_none() { + return mat; + } + } + let mut whitelisted = Match::None; + if self.has_any_ignore_rules() { + let mat = self.matched_ignore(path, is_dir); + if mat.is_ignore() { + return mat; + } else if mat.is_whitelist() { + whitelisted = mat; + } + } + if !self.0.types.is_empty() { + let mat = self.0.types.matched(path, is_dir).map(IgnoreMatch::types); + if mat.is_ignore() { + return mat; + } else if mat.is_whitelist() { + whitelisted = mat; + } + } + whitelisted + } + + /// Performs matching only on the ignore files for this directory and + /// all parent directories. + fn matched_ignore<'a>(&'a self, path: &Path, is_dir: bool) -> Match> { + let (mut m_custom_ignore, mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) = ( + Match::None, + Match::None, + Match::None, + Match::None, + Match::None, + ); + let any_git = !self.0.opts.require_git || self.parents().any(|ig| ig.0.has_git); + let mut saw_git = false; + for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) { + if m_custom_ignore.is_none() { + m_custom_ignore = + ig.0.custom_ignore_matcher + .matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + if m_ignore.is_none() { + m_ignore = + ig.0.ignore_matcher + .matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi.is_none() { + m_gi = + ig.0.git_ignore_matcher + .matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi_exclude.is_none() { + m_gi_exclude = + ig.0.git_exclude_matcher + .matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + saw_git = saw_git || ig.0.has_git; + } + if self.0.opts.parents { + // CHANGED: We removed a code path that rewrote the `path` to be relative to + // `self.absolute_base()` because it assumed that the every path is inside the base + // which is not the case for us as we use `WalkBuilder#add` to add roots outside of the + // base. + for ig in self.parents().skip_while(|ig| !ig.0.is_absolute_parent) { + if m_custom_ignore.is_none() { + m_custom_ignore = + ig.0.custom_ignore_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if m_ignore.is_none() { + m_ignore = + ig.0.ignore_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi.is_none() { + m_gi = + ig.0.git_ignore_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if any_git && !saw_git && m_gi_exclude.is_none() { + m_gi_exclude = + ig.0.git_exclude_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + saw_git = saw_git || ig.0.has_git; + } + } + for gi in self.0.explicit_ignores.iter().rev() { + // CHANGED: We need to make sure that the explicit gitignore rules apply to the path + // + // path = Is the current file/folder we are traversing + // gi.path() = Is the path of the custom gitignore file + // + // E.g.: If we have a custom rule for `/src/utils` with `**/*`, and we are looking at + // just `/src`, then the `**/*` rules do not apply to this folder, so we can + // ignore the current custom gitignore file. + // + if !path.starts_with(gi.path()) { + continue; + } + if !m_explicit.is_none() { + break; + } + m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore); + } + let m_global = if any_git { + self.0 + .git_global_matcher + .matched(&path, is_dir) + .map(IgnoreMatch::gitignore) + } else { + Match::None + }; + + // CHANGED: We added logic to configure an order in which the ignore files are respected and + // allowed a whitelist in a later file to overrule a block on an earlier file. + let order = [ + // Manually added ignores + &m_explicit, + // .custom-ignore + &m_custom_ignore, + // .ignore + &m_ignore, + // .gitignore + &m_gi, + // .git/info/exclude + &m_gi_exclude, + // Global gitignore + &m_global, + ]; + + for check in order.into_iter() { + if check.is_none() { + continue; + } + + return check.clone(); + } + + m_explicit + } + + /// Returns an iterator over parent ignore matchers, including this one. + pub(crate) fn parents(&self) -> Parents<'_> { + Parents(Some(self)) + } +} + +/// An iterator over all parents of an ignore matcher, including itself. +/// +/// The lifetime `'a` refers to the lifetime of the initial `Ignore` matcher. +pub(crate) struct Parents<'a>(Option<&'a Ignore>); + +impl<'a> Iterator for Parents<'a> { + type Item = &'a Ignore; + + fn next(&mut self) -> Option<&'a Ignore> { + match self.0.take() { + None => None, + Some(ig) => { + self.0 = ig.0.parent.as_ref(); + Some(ig) + } + } + } +} + +/// A builder for creating an Ignore matcher. +#[derive(Clone, Debug)] +pub(crate) struct IgnoreBuilder { + /// The root directory path for this ignore matcher. + dir: PathBuf, + /// An override matcher (default is empty). + overrides: Arc, + /// A type matcher (default is empty). + types: Arc, + /// Explicit global ignore matchers. + explicit_ignores: Vec, + /// Ignore files in addition to .ignore. + custom_ignore_filenames: Vec, + /// Ignore config. + opts: IgnoreOptions, +} + +impl IgnoreBuilder { + /// Create a new builder for an `Ignore` matcher. + /// + /// All relative file paths are resolved with respect to the current + /// working directory. + pub(crate) fn new() -> IgnoreBuilder { + IgnoreBuilder { + dir: Path::new("").to_path_buf(), + overrides: Arc::new(Override::empty()), + types: Arc::new(Types::empty()), + explicit_ignores: vec![], + custom_ignore_filenames: vec![], + opts: IgnoreOptions { + hidden: true, + ignore: true, + parents: true, + git_global: true, + git_ignore: true, + git_exclude: true, + ignore_case_insensitive: false, + require_git: true, + }, + } + } + + /// Builds a new `Ignore` matcher. + /// + /// The matcher returned won't match anything until ignore rules from + /// directories are added to it. + pub(crate) fn build(&self) -> Ignore { + let git_global_matcher = if !self.opts.git_global { + Gitignore::empty() + } else { + let mut builder = GitignoreBuilder::new(""); + builder + .case_insensitive(self.opts.ignore_case_insensitive) + .unwrap(); + let (gi, err) = builder.build_global(); + if let Some(err) = err { + log::debug!("{}", err); + } + gi + }; + + Ignore(Arc::new(IgnoreInner { + compiled: Arc::new(RwLock::new(HashMap::new())), + dir: self.dir.clone(), + overrides: self.overrides.clone(), + types: self.types.clone(), + parent: None, + is_absolute_parent: true, + absolute_base: None, + explicit_ignores: Arc::new(self.explicit_ignores.clone()), + custom_ignore_filenames: Arc::new(self.custom_ignore_filenames.clone()), + custom_ignore_matcher: Gitignore::empty(), + ignore_matcher: Gitignore::empty(), + git_global_matcher: Arc::new(git_global_matcher), + git_ignore_matcher: Gitignore::empty(), + git_exclude_matcher: Gitignore::empty(), + has_git: false, + opts: self.opts, + })) + } + + /// Add an override matcher. + /// + /// By default, no override matcher is used. + /// + /// This overrides any previous setting. + pub(crate) fn overrides(&mut self, overrides: Override) -> &mut IgnoreBuilder { + self.overrides = Arc::new(overrides); + self + } + + /// Add a file type matcher. + /// + /// By default, no file type matcher is used. + /// + /// This overrides any previous setting. + pub(crate) fn types(&mut self, types: Types) -> &mut IgnoreBuilder { + self.types = Arc::new(types); + self + } + + /// Adds a new global ignore matcher from the ignore file path given. + pub(crate) fn add_ignore(&mut self, ig: Gitignore) -> &mut IgnoreBuilder { + self.explicit_ignores.push(ig); + self + } + + /// Add a custom ignore file name + /// + /// These ignore files have higher precedence than all other ignore files. + /// + /// When specifying multiple names, earlier names have lower precedence than + /// later names. + pub(crate) fn add_custom_ignore_filename>( + &mut self, + file_name: S, + ) -> &mut IgnoreBuilder { + self.custom_ignore_filenames + .push(file_name.as_ref().to_os_string()); + self + } + + /// Enables ignoring hidden files. + /// + /// This is enabled by default. + pub(crate) fn hidden(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.hidden = yes; + self + } + + /// Enables reading `.ignore` files. + /// + /// `.ignore` files have the same semantics as `gitignore` files and are + /// supported by search tools such as ripgrep and The Silver Searcher. + /// + /// This is enabled by default. + pub(crate) fn ignore(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.ignore = yes; + self + } + + /// Enables reading ignore files from parent directories. + /// + /// If this is enabled, then .gitignore files in parent directories of each + /// file path given are respected. Otherwise, they are ignored. + /// + /// This is enabled by default. + pub(crate) fn parents(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.parents = yes; + self + } + + /// Add a global gitignore matcher. + /// + /// Its precedence is lower than both normal `.gitignore` files and + /// `.git/info/exclude` files. + /// + /// This overwrites any previous global gitignore setting. + /// + /// This is enabled by default. + pub(crate) fn git_global(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_global = yes; + self + } + + /// Enables reading `.gitignore` files. + /// + /// `.gitignore` files have match semantics as described in the `gitignore` + /// man page. + /// + /// This is enabled by default. + pub(crate) fn git_ignore(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_ignore = yes; + self + } + + /// Enables reading `.git/info/exclude` files. + /// + /// `.git/info/exclude` files have match semantics as described in the + /// `gitignore` man page. + /// + /// This is enabled by default. + pub(crate) fn git_exclude(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_exclude = yes; + self + } + + /// Whether a git repository is required to apply git-related ignore + /// rules (global rules, .gitignore and local exclude rules). + /// + /// When disabled, git-related ignore rules are applied even when searching + /// outside a git repository. + pub(crate) fn require_git(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.require_git = yes; + self + } + + /// Process ignore files case insensitively + /// + /// This is disabled by default. + pub(crate) fn ignore_case_insensitive(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.ignore_case_insensitive = yes; + self + } +} + +/// Creates a new gitignore matcher for the directory given. +/// +/// The matcher is meant to match files below `dir`. +/// Ignore globs are extracted from each of the file names relative to +/// `dir_for_ignorefile` in the order given (earlier names have lower +/// precedence than later names). +/// +/// I/O errors are ignored. +pub(crate) fn create_gitignore>( + dir: &Path, + dir_for_ignorefile: &Path, + names: &[T], + case_insensitive: bool, +) -> (Gitignore, Option) { + let mut builder = GitignoreBuilder::new(dir); + let mut errs = PartialErrorBuilder::default(); + builder.case_insensitive(case_insensitive).unwrap(); + for name in names { + let gipath = dir_for_ignorefile.join(name.as_ref()); + // This check is not necessary, but is added for performance. Namely, + // a simple stat call checking for existence can often be just a bit + // quicker than actually trying to open a file. Since the number of + // directories without ignore files likely greatly exceeds the number + // with ignore files, this check generally makes sense. + // + // However, until demonstrated otherwise, we speculatively do not do + // this on Windows since Windows is notorious for having slow file + // system operations. Namely, it's not clear whether this analysis + // makes sense on Windows. + // + // For more details: https://github.com/BurntSushi/ripgrep/pull/1381 + if cfg!(windows) || gipath.exists() { + errs.maybe_push_ignore_io(builder.add(gipath)); + } + } + let gi = match builder.build() { + Ok(gi) => gi, + Err(err) => { + errs.push(err); + GitignoreBuilder::new(dir).build().unwrap() + } + }; + (gi, errs.into_error_option()) +} + +/// Find the GIT_COMMON_DIR for the given git worktree. +/// +/// This is the directory that may contain a private ignore file +/// "info/exclude". Unlike git, this function does *not* read environment +/// variables GIT_DIR and GIT_COMMON_DIR, because it is not clear how to use +/// them when multiple repositories are searched. +/// +/// Some I/O errors are ignored. +fn resolve_git_commondir(dir: &Path, git_type: Option) -> Result> { + let git_dir_path = || dir.join(".git"); + let git_dir = git_dir_path(); + if !git_type.map_or(false, |ft| ft.is_file()) { + return Ok(git_dir); + } + let file = match File::open(git_dir) { + Ok(file) => io::BufReader::new(file), + Err(err) => { + return Err(Some(Error::Io(err).with_path(git_dir_path()))); + } + }; + let dot_git_line = match file.lines().next() { + Some(Ok(line)) => line, + Some(Err(err)) => { + return Err(Some(Error::Io(err).with_path(git_dir_path()))); + } + None => return Err(None), + }; + if !dot_git_line.starts_with("gitdir: ") { + return Err(None); + } + let real_git_dir = PathBuf::from(&dot_git_line["gitdir: ".len()..]); + let git_commondir_file = || real_git_dir.join("commondir"); + let file = match File::open(git_commondir_file()) { + Ok(file) => io::BufReader::new(file), + Err(_) => return Err(None), + }; + let commondir_line = match file.lines().next() { + Some(Ok(line)) => line, + Some(Err(err)) => { + return Err(Some(Error::Io(err).with_path(git_commondir_file()))); + } + None => return Err(None), + }; + let commondir_abs = if commondir_line.starts_with(".") { + real_git_dir.join(commondir_line) // relative commondir + } else { + PathBuf::from(commondir_line) + }; + Ok(commondir_abs) +} + +#[cfg(test)] +mod tests { + use std::{io::Write, path::Path}; + + use crate::{dir::IgnoreBuilder, gitignore::Gitignore, tests::TempDir, Error}; + + fn wfile>(path: P, contents: &str) { + let mut file = std::fs::File::create(path).unwrap(); + file.write_all(contents.as_bytes()).unwrap(); + } + + fn mkdirp>(path: P) { + std::fs::create_dir_all(path).unwrap(); + } + + fn partial(err: Error) -> Vec { + match err { + Error::Partial(errs) => errs, + _ => panic!("expected partial error but got {:?}", err), + } + } + + fn tmpdir() -> TempDir { + TempDir::new().unwrap() + } + + #[test] + fn explicit_ignore() { + let td = tmpdir(); + wfile(td.path().join("not-an-ignore"), "foo\n!bar"); + + let (gi, err) = Gitignore::new(td.path().join("not-an-ignore")); + assert!(err.is_none()); + let (ig, err) = IgnoreBuilder::new() + .add_ignore(gi) + .build() + .add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched(td.path().join("foo"), false).is_ignore()); + assert!(ig.matched(td.path().join("bar"), false).is_whitelist()); + assert!(ig.matched(td.path().join("baz"), false).is_none()); + assert!(ig.matched("/foo", false).is_none()); + } + + #[test] + fn git_exclude() { + let td = tmpdir(); + mkdirp(td.path().join(".git/info")); + wfile(td.path().join(".git/info/exclude"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn gitignore() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + wfile(td.path().join(".gitignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn gitignore_no_git() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_none()); + assert!(ig.matched("bar", false).is_none()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn gitignore_allowed_no_git() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new() + .require_git(false) + .build() + .add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn ignore() { + let td = tmpdir(); + wfile(td.path().join(".ignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn custom_ignore() { + let td = tmpdir(); + let custom_ignore = ".customignore"; + wfile(td.path().join(custom_ignore), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new() + .add_custom_ignore_filename(custom_ignore) + .build() + .add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + // Tests that a custom ignore file will override an .ignore. + #[test] + fn custom_ignore_over_ignore() { + let td = tmpdir(); + let custom_ignore = ".customignore"; + wfile(td.path().join(".ignore"), "foo"); + wfile(td.path().join(custom_ignore), "!foo"); + + let (ig, err) = IgnoreBuilder::new() + .add_custom_ignore_filename(custom_ignore) + .build() + .add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_whitelist()); + } + + // Tests that earlier custom ignore files have lower precedence than later. + #[test] + fn custom_ignore_precedence() { + let td = tmpdir(); + let custom_ignore1 = ".customignore1"; + let custom_ignore2 = ".customignore2"; + wfile(td.path().join(custom_ignore1), "foo"); + wfile(td.path().join(custom_ignore2), "!foo"); + + let (ig, err) = IgnoreBuilder::new() + .add_custom_ignore_filename(custom_ignore1) + .add_custom_ignore_filename(custom_ignore2) + .build() + .add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_whitelist()); + } + + // Tests that an .ignore will override a .gitignore. + #[test] + fn ignore_over_gitignore() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join(".ignore"), "!foo"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_whitelist()); + } + + // Tests that exclude has lower precedent than both .ignore and .gitignore. + #[test] + fn exclude_lowest() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "!foo"); + wfile(td.path().join(".ignore"), "!bar"); + mkdirp(td.path().join(".git/info")); + wfile(td.path().join(".git/info/exclude"), "foo\nbar\nbaz"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("baz", false).is_ignore()); + assert!(ig.matched("foo", false).is_whitelist()); + assert!(ig.matched("bar", false).is_whitelist()); + } + + #[test] + fn errored() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "{foo"); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + } + + #[test] + fn errored_both() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "{foo"); + wfile(td.path().join(".ignore"), "{bar"); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert_eq!(2, partial(err.expect("an error")).len()); + } + + #[test] + fn errored_partial() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + wfile(td.path().join(".gitignore"), "{foo\nbar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + assert!(ig.matched("bar", false).is_ignore()); + } + + #[test] + fn errored_partial_and_ignore() { + let td = tmpdir(); + wfile(td.path().join(".gitignore"), "{foo\nbar"); + wfile(td.path().join(".ignore"), "!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + assert!(ig.matched("bar", false).is_whitelist()); + } + + #[test] + fn not_present_empty() { + let td = tmpdir(); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + } + + #[test] + fn stops_at_git_dir() { + // This tests that .gitignore files beyond a .git barrier aren't + // matched, but .ignore files are. + let td = tmpdir(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("foo/.git")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join(".ignore"), "bar"); + + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_child(td.path()); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child(ig1.path().join("foo")); + assert!(err.is_none()); + + assert!(ig1.matched("foo", false).is_ignore()); + assert!(ig2.matched("foo", false).is_none()); + + assert!(ig1.matched("bar", false).is_ignore()); + assert!(ig2.matched("bar", false).is_ignore()); + } + + #[test] + fn absolute_parent() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("foo")); + wfile(td.path().join(".gitignore"), "bar"); + + // First, check that the parent gitignore file isn't detected if the + // parent isn't added. This establishes a baseline. + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_child(td.path().join("foo")); + assert!(err.is_none()); + assert!(ig1.matched("bar", false).is_none()); + + // Second, check that adding a parent directory actually works. + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_parents(td.path().join("foo")); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child(td.path().join("foo")); + assert!(err.is_none()); + assert!(ig2.matched("bar", false).is_ignore()); + } + + #[test] + fn absolute_parent_anchored() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("src/llvm")); + wfile(td.path().join(".gitignore"), "/llvm/\nfoo"); + + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_parents(td.path().join("src")); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child("src"); + assert!(err.is_none()); + + // CHANGED: These test cases do not make sense for us as we never call the Ignore with + // relative paths. + assert!(ig1.matched("llvm", true).is_ignore()); + assert!(ig2.matched("llvm", true).is_ignore()); + assert!(ig2.matched("src/llvm", true).is_none()); + assert!(ig2.matched("foo", false).is_ignore()); + assert!(ig2.matched("src/foo", false).is_ignore()); + } + + #[test] + fn git_info_exclude_in_linked_worktree() { + let td = tmpdir(); + let git_dir = td.path().join(".git"); + mkdirp(git_dir.join("info")); + wfile(git_dir.join("info/exclude"), "ignore_me"); + mkdirp(git_dir.join("worktrees/linked-worktree")); + let commondir_path = || git_dir.join("worktrees/linked-worktree/commondir"); + mkdirp(td.path().join("linked-worktree")); + let worktree_git_dir_abs = format!( + "gitdir: {}", + git_dir.join("worktrees/linked-worktree").to_str().unwrap(), + ); + wfile( + td.path().join("linked-worktree/.git"), + &worktree_git_dir_abs, + ); + + // relative commondir + wfile(commondir_path(), "../.."); + let ib = IgnoreBuilder::new().build(); + let (ignore, err) = ib.add_child(td.path().join("linked-worktree")); + assert!(err.is_none()); + assert!(ignore.matched("ignore_me", false).is_ignore()); + + // absolute commondir + wfile(commondir_path(), git_dir.to_str().unwrap()); + let (ignore, err) = ib.add_child(td.path().join("linked-worktree")); + assert!(err.is_none()); + assert!(ignore.matched("ignore_me", false).is_ignore()); + + // missing commondir file + assert!(std::fs::remove_file(commondir_path()).is_ok()); + let (_, err) = ib.add_child(td.path().join("linked-worktree")); + // We squash the error in this case, because it occurs in repositories + // that are not linked worktrees but have submodules. + assert!(err.is_none()); + + wfile(td.path().join("linked-worktree/.git"), "garbage"); + let (_, err) = ib.add_child(td.path().join("linked-worktree")); + assert!(err.is_none()); + + wfile(td.path().join("linked-worktree/.git"), "gitdir: garbage"); + let (_, err) = ib.add_child(td.path().join("linked-worktree")); + assert!(err.is_none()); + } +} diff --git a/crates/ignore/src/gitignore.rs b/crates/ignore/src/gitignore.rs new file mode 100644 index 000000000000..30f1ccef2d7c --- /dev/null +++ b/crates/ignore/src/gitignore.rs @@ -0,0 +1,812 @@ +/*! +The gitignore module provides a way to match globs from a gitignore file +against file paths. + +Note that this module implements the specification as described in the +`gitignore` man page from scratch. That is, this module does *not* shell out to +the `git` command line tool. +*/ + +use std::{ + fs::File, + io::{BufRead, BufReader, Read}, + path::{Path, PathBuf}, + sync::Arc, +}; + +use { + globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder}, + regex_automata::util::pool::Pool, +}; + +use crate::{ + pathutil::{is_file_name, strip_prefix}, + Error, Match, PartialErrorBuilder, +}; + +/// Glob represents a single glob in a gitignore file. +/// +/// This is used to report information about the highest precedent glob that +/// matched in one or more gitignore files. +#[derive(Clone, Debug)] +pub struct Glob { + /// The file path that this glob was extracted from. + from: Option, + /// The original glob string. + original: String, + /// The actual glob string used to convert to a regex. + actual: String, + /// Whether this is a whitelisted glob or not. + is_whitelist: bool, + /// Whether this glob should only match directories or not. + is_only_dir: bool, +} + +impl Glob { + /// Returns the file path that defined this glob. + pub fn from(&self) -> Option<&Path> { + self.from.as_ref().map(|p| &**p) + } + + /// The original glob as it was defined in a gitignore file. + pub fn original(&self) -> &str { + &self.original + } + + /// The actual glob that was compiled to respect gitignore + /// semantics. + pub fn actual(&self) -> &str { + &self.actual + } + + /// Whether this was a whitelisted glob or not. + pub fn is_whitelist(&self) -> bool { + self.is_whitelist + } + + /// Whether this glob must match a directory or not. + pub fn is_only_dir(&self) -> bool { + self.is_only_dir + } + + /// Returns true if and only if this glob has a `**/` prefix. + fn has_doublestar_prefix(&self) -> bool { + self.actual.starts_with("**/") || self.actual == "**" + } +} + +/// Gitignore is a matcher for the globs in one or more gitignore files +/// in the same directory. +#[derive(Clone, Debug)] +pub struct Gitignore { + set: GlobSet, + root: PathBuf, + globs: Vec, + num_ignores: u64, + num_whitelists: u64, + matches: Option>>>, +} + +impl Gitignore { + /// Creates a new gitignore matcher from the gitignore file path given. + /// + /// If it's desirable to include multiple gitignore files in a single + /// matcher, or read gitignore globs from a different source, then + /// use `GitignoreBuilder`. + /// + /// This always returns a valid matcher, even if it's empty. In particular, + /// a Gitignore file can be partially valid, e.g., when one glob is invalid + /// but the rest aren't. + /// + /// Note that I/O errors are ignored. For more granular control over + /// errors, use `GitignoreBuilder`. + pub fn new>(gitignore_path: P) -> (Gitignore, Option) { + let path = gitignore_path.as_ref(); + let parent = path.parent().unwrap_or(Path::new("/")); + let mut builder = GitignoreBuilder::new(parent); + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push_ignore_io(builder.add(path)); + match builder.build() { + Ok(gi) => (gi, errs.into_error_option()), + Err(err) => { + errs.push(err); + (Gitignore::empty(), errs.into_error_option()) + } + } + } + + /// Creates a new gitignore matcher from the global ignore file, if one + /// exists. + /// + /// The global config file path is specified by git's `core.excludesFile` + /// config option. + /// + /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` + /// does not exist or does not specify `core.excludesFile`, then + /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not + /// set or is empty, then `$HOME/.config/git/ignore` is used instead. + pub fn global() -> (Gitignore, Option) { + GitignoreBuilder::new("").build_global() + } + + /// Creates a new empty gitignore matcher that never matches anything. + /// + /// Its path is empty. + pub fn empty() -> Gitignore { + Gitignore { + set: GlobSet::empty(), + root: PathBuf::from(""), + globs: vec![], + num_ignores: 0, + num_whitelists: 0, + matches: None, + } + } + + /// Returns the directory containing this gitignore matcher. + /// + /// All matches are done relative to this path. + pub fn path(&self) -> &Path { + &*self.root + } + + /// Returns true if and only if this gitignore has zero globs, and + /// therefore never matches any file path. + pub fn is_empty(&self) -> bool { + self.set.is_empty() + } + + /// Returns the total number of globs, which should be equivalent to + /// `num_ignores + num_whitelists`. + pub fn len(&self) -> usize { + self.set.len() + } + + /// Returns the total number of ignore globs. + pub fn num_ignores(&self) -> u64 { + self.num_ignores + } + + /// Returns the total number of whitelisted globs. + pub fn num_whitelists(&self) -> u64 { + self.num_whitelists + } + + /// Returns whether the given path (file or directory) matched a pattern in + /// this gitignore matcher. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// The given path is matched relative to the path given when building + /// the matcher. Specifically, before matching `path`, its prefix (as + /// determined by a common suffix of the directory containing this + /// gitignore) is stripped. If there is no common suffix/prefix overlap, + /// then `path` is assumed to be relative to this matcher. + pub fn matched>(&self, path: P, is_dir: bool) -> Match<&Glob> { + if self.is_empty() { + return Match::None; + } + self.matched_stripped(self.strip(path.as_ref()), is_dir) + } + + /// Returns whether the given path (file or directory, and expected to be + /// under the root) or any of its parent directories (up to the root) + /// matched a pattern in this gitignore matcher. + /// + /// NOTE: This method is more expensive than walking the directory hierarchy + /// top-to-bottom and matching the entries. But, is easier to use in cases + /// when a list of paths are available without a hierarchy. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// The given path is matched relative to the path given when building + /// the matcher. Specifically, before matching `path`, its prefix (as + /// determined by a common suffix of the directory containing this + /// gitignore) is stripped. If there is no common suffix/prefix overlap, + /// then `path` is assumed to be relative to this matcher. + /// + /// # Panics + /// + /// This method panics if the given file path is not under the root path + /// of this matcher. + pub fn matched_path_or_any_parents>( + &self, + path: P, + is_dir: bool, + ) -> Match<&Glob> { + if self.is_empty() { + return Match::None; + } + let mut path = self.strip(path.as_ref()); + assert!(!path.has_root(), "path is expected to be under the root"); + + match self.matched_stripped(path, is_dir) { + Match::None => (), // walk up + a_match => return a_match, + } + while let Some(parent) = path.parent() { + match self.matched_stripped(parent, /* is_dir */ true) { + Match::None => path = parent, // walk up + a_match => return a_match, + } + } + Match::None + } + + /// Like matched, but takes a path that has already been stripped. + fn matched_stripped>(&self, path: P, is_dir: bool) -> Match<&Glob> { + if self.is_empty() { + return Match::None; + } + let path = path.as_ref(); + let mut matches = self.matches.as_ref().unwrap().get(); + let candidate = Candidate::new(path); + self.set.matches_candidate_into(&candidate, &mut *matches); + for &i in matches.iter().rev() { + let glob = &self.globs[i]; + if !glob.is_only_dir() || is_dir { + return if glob.is_whitelist() { + Match::Whitelist(glob) + } else { + Match::Ignore(glob) + }; + } + } + Match::None + } + + /// Strips the given path such that it's suitable for matching with this + /// gitignore matcher. + fn strip<'a, P: 'a + AsRef + ?Sized>(&'a self, path: &'a P) -> &'a Path { + let mut path = path.as_ref(); + // A leading ./ is completely superfluous. We also strip it from + // our gitignore root path, so we need to strip it from our candidate + // path too. + if let Some(p) = strip_prefix("./", path) { + path = p; + } + // Strip any common prefix between the candidate path and the root + // of the gitignore, to make sure we get relative matching right. + // BUT, a file name might not have any directory components to it, + // in which case, we don't want to accidentally strip any part of the + // file name. + // + // As an additional special case, if the root is just `.`, then we + // shouldn't try to strip anything, e.g., when path begins with a `.`. + if self.root != Path::new(".") && !is_file_name(path) { + if let Some(p) = strip_prefix(&self.root, path) { + path = p; + // If we're left with a leading slash, get rid of it. + if let Some(p) = strip_prefix("/", path) { + path = p; + } + } + } + path + } +} + +/// Builds a matcher for a single set of globs from a .gitignore file. +#[derive(Clone, Debug)] +pub struct GitignoreBuilder { + builder: GlobSetBuilder, + root: PathBuf, + globs: Vec, + case_insensitive: bool, +} + +impl GitignoreBuilder { + /// Create a new builder for a gitignore file. + /// + /// The path given should be the path at which the globs for this gitignore + /// file should be matched. Note that paths are always matched relative + /// to the root path given here. Generally, the root path should correspond + /// to the *directory* containing a `.gitignore` file. + pub fn new>(root: P) -> GitignoreBuilder { + let root = root.as_ref(); + GitignoreBuilder { + builder: GlobSetBuilder::new(), + root: strip_prefix("./", root).unwrap_or(root).to_path_buf(), + globs: vec![], + case_insensitive: false, + } + } + + /// Builds a new matcher from the globs added so far. + /// + /// Once a matcher is built, no new globs can be added to it. + pub fn build(&self) -> Result { + let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count(); + let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count(); + let set = self.builder.build().map_err(|err| Error::Glob { + glob: None, + err: err.to_string(), + })?; + Ok(Gitignore { + set, + root: self.root.clone(), + globs: self.globs.clone(), + num_ignores: nignore as u64, + num_whitelists: nwhite as u64, + matches: Some(Arc::new(Pool::new(|| vec![]))), + }) + } + + /// Build a global gitignore matcher using the configuration in this + /// builder. + /// + /// This consumes ownership of the builder unlike `build` because it + /// must mutate the builder to add the global gitignore globs. + /// + /// Note that this ignores the path given to this builder's constructor + /// and instead derives the path automatically from git's global + /// configuration. + pub fn build_global(mut self) -> (Gitignore, Option) { + match gitconfig_excludes_path() { + None => (Gitignore::empty(), None), + Some(path) => { + if !path.is_file() { + (Gitignore::empty(), None) + } else { + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push_ignore_io(self.add(path)); + match self.build() { + Ok(gi) => (gi, errs.into_error_option()), + Err(err) => { + errs.push(err); + (Gitignore::empty(), errs.into_error_option()) + } + } + } + } + } + } + + /// Add each glob from the file path given. + /// + /// The file given should be formatted as a `gitignore` file. + /// + /// Note that partial errors can be returned. For example, if there was + /// a problem adding one glob, an error for that will be returned, but + /// all other valid globs will still be added. + pub fn add>(&mut self, path: P) -> Option { + let path = path.as_ref(); + let file = match File::open(path) { + Err(err) => return Some(Error::Io(err).with_path(path)), + Ok(file) => file, + }; + log::debug!("opened gitignore file: {}", path.display()); + let rdr = BufReader::new(file); + let mut errs = PartialErrorBuilder::default(); + for (i, line) in rdr.lines().enumerate() { + let lineno = (i + 1) as u64; + let line = match line { + Ok(line) => line, + Err(err) => { + errs.push(Error::Io(err).tagged(path, lineno)); + break; + } + }; + if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) { + errs.push(err.tagged(path, lineno)); + } + } + errs.into_error_option() + } + + /// Add each glob line from the string given. + /// + /// If this string came from a particular `gitignore` file, then its path + /// should be provided here. + /// + /// The string given should be formatted as a `gitignore` file. + #[cfg(test)] + fn add_str( + &mut self, + from: Option, + gitignore: &str, + ) -> Result<&mut GitignoreBuilder, Error> { + for line in gitignore.lines() { + self.add_line(from.clone(), line)?; + } + Ok(self) + } + + /// Add a line from a gitignore file to this builder. + /// + /// If this line came from a particular `gitignore` file, then its path + /// should be provided here. + /// + /// If the line could not be parsed as a glob, then an error is returned. + pub fn add_line( + &mut self, + from: Option, + mut line: &str, + ) -> Result<&mut GitignoreBuilder, Error> { + #![allow(deprecated)] + + if line.starts_with("#") { + return Ok(self); + } + if !line.ends_with("\\ ") { + line = line.trim_right(); + } + if line.is_empty() { + return Ok(self); + } + let mut glob = Glob { + from, + original: line.to_string(), + actual: String::new(), + is_whitelist: false, + is_only_dir: false, + }; + let mut is_absolute = false; + if line.starts_with("\\!") || line.starts_with("\\#") { + line = &line[1..]; + is_absolute = line.chars().nth(0) == Some('/'); + } else { + if line.starts_with("!") { + glob.is_whitelist = true; + line = &line[1..]; + } + if line.starts_with("/") { + // `man gitignore` says that if a glob starts with a slash, + // then the glob can only match the beginning of a path + // (relative to the location of gitignore). We achieve this by + // simply banning wildcards from matching /. + line = &line[1..]; + is_absolute = true; + } + } + // If it ends with a slash, then this should only match directories, + // but the slash should otherwise not be used while globbing. + if line.as_bytes().last() == Some(&b'/') { + glob.is_only_dir = true; + line = &line[..line.len() - 1]; + // If the slash was escaped, then remove the escape. + // See: https://github.com/BurntSushi/ripgrep/issues/2236 + if line.as_bytes().last() == Some(&b'\\') { + line = &line[..line.len() - 1]; + } + } + glob.actual = line.to_string(); + // If there is a literal slash, then this is a glob that must match the + // entire path name. Otherwise, we should let it match anywhere, so use + // a **/ prefix. + if !is_absolute && !line.chars().any(|c| c == '/') { + // ... but only if we don't already have a **/ prefix. + if !glob.has_doublestar_prefix() { + glob.actual = format!("**/{}", glob.actual); + } + } + // If the glob ends with `/**`, then we should only match everything + // inside a directory, but not the directory itself. Standard globs + // will match the directory. So we add `/*` to force the issue. + if glob.actual.ends_with("/**") { + glob.actual = format!("{}/*", glob.actual); + } + let parsed = GlobBuilder::new(&glob.actual) + .literal_separator(true) + .case_insensitive(self.case_insensitive) + .backslash_escape(true) + .build() + .map_err(|err| Error::Glob { + glob: Some(glob.original.clone()), + err: err.kind().to_string(), + })?; + self.builder.add(parsed); + self.globs.push(glob); + Ok(self) + } + + /// Toggle whether the globs should be matched case insensitively or not. + /// + /// When this option is changed, only globs added after the change will be + /// affected. + /// + /// This is disabled by default. + pub fn case_insensitive(&mut self, yes: bool) -> Result<&mut GitignoreBuilder, Error> { + // TODO: This should not return a `Result`. Fix this in the next semver + // release. + self.case_insensitive = yes; + Ok(self) + } +} + +/// Return the file path of the current environment's global gitignore file. +/// +/// Note that the file path returned may not exist. +pub fn gitconfig_excludes_path() -> Option { + // git supports $HOME/.gitconfig and $XDG_CONFIG_HOME/git/config. Notably, + // both can be active at the same time, where $HOME/.gitconfig takes + // precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then + // we're done. + match gitconfig_home_contents().and_then(|x| parse_excludes_file(&x)) { + Some(path) => return Some(path), + None => {} + } + match gitconfig_xdg_contents().and_then(|x| parse_excludes_file(&x)) { + Some(path) => return Some(path), + None => {} + } + excludes_file_default() +} + +/// Returns the file contents of git's global config file, if one exists, in +/// the user's home directory. +fn gitconfig_home_contents() -> Option> { + let home = match home_dir() { + None => return None, + Some(home) => home, + }; + let mut file = match File::open(home.join(".gitconfig")) { + Err(_) => return None, + Ok(file) => BufReader::new(file), + }; + let mut contents = vec![]; + file.read_to_end(&mut contents).ok().map(|_| contents) +} + +/// Returns the file contents of git's global config file, if one exists, in +/// the user's XDG_CONFIG_HOME directory. +fn gitconfig_xdg_contents() -> Option> { + let path = std::env::var_os("XDG_CONFIG_HOME") + .and_then(|x| { + if x.is_empty() { + None + } else { + Some(PathBuf::from(x)) + } + }) + .or_else(|| home_dir().map(|p| p.join(".config"))) + .map(|x| x.join("git/config")); + let mut file = match path.and_then(|p| File::open(p).ok()) { + None => return None, + Some(file) => BufReader::new(file), + }; + let mut contents = vec![]; + file.read_to_end(&mut contents).ok().map(|_| contents) +} + +/// Returns the default file path for a global .gitignore file. +/// +/// Specifically, this respects XDG_CONFIG_HOME. +fn excludes_file_default() -> Option { + std::env::var_os("XDG_CONFIG_HOME") + .and_then(|x| { + if x.is_empty() { + None + } else { + Some(PathBuf::from(x)) + } + }) + .or_else(|| home_dir().map(|p| p.join(".config"))) + .map(|x| x.join("git/ignore")) +} + +/// Extract git's `core.excludesfile` config setting from the raw file contents +/// given. +fn parse_excludes_file(data: &[u8]) -> Option { + use std::sync::OnceLock; + + use regex_automata::{meta::Regex, util::syntax}; + + // N.B. This is the lazy approach, and isn't technically correct, but + // probably works in more circumstances. I guess we would ideally have + // a full INI parser. Yuck. + static RE: OnceLock = OnceLock::new(); + let re = RE.get_or_init(|| { + Regex::builder() + .configure(Regex::config().utf8_empty(false)) + .syntax(syntax::Config::new().utf8(false)) + .build(r#"(?im-u)^\s*excludesfile\s*=\s*"?\s*(\S+?)\s*"?\s*$"#) + .unwrap() + }); + // We don't care about amortizing allocs here I think. This should only + // be called ~once per traversal or so? (Although it's not guaranteed...) + let mut caps = re.create_captures(); + re.captures(data, &mut caps); + let span = caps.get_group(1)?; + let candidate = &data[span]; + std::str::from_utf8(candidate) + .ok() + .map(|s| PathBuf::from(expand_tilde(s))) +} + +/// Expands ~ in file paths to the value of $HOME. +fn expand_tilde(path: &str) -> String { + let home = match home_dir() { + None => return path.to_string(), + Some(home) => home.to_string_lossy().into_owned(), + }; + path.replace("~", &home) +} + +/// Returns the location of the user's home directory. +fn home_dir() -> Option { + // We're fine with using std::env::home_dir for now. Its bugs are, IMO, + // pretty minor corner cases. + #![allow(deprecated)] + std::env::home_dir() +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use super::{Gitignore, GitignoreBuilder}; + + fn gi_from_str>(root: P, s: &str) -> Gitignore { + let mut builder = GitignoreBuilder::new(root); + builder.add_str(None, s).unwrap(); + builder.build().unwrap() + } + + macro_rules! ignored { + ($name:ident, $root:expr, $gi:expr, $path:expr) => { + ignored!($name, $root, $gi, $path, false); + }; + ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { + #[test] + fn $name() { + let gi = gi_from_str($root, $gi); + assert!(gi.matched($path, $is_dir).is_ignore()); + } + }; + } + + macro_rules! not_ignored { + ($name:ident, $root:expr, $gi:expr, $path:expr) => { + not_ignored!($name, $root, $gi, $path, false); + }; + ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { + #[test] + fn $name() { + let gi = gi_from_str($root, $gi); + assert!(!gi.matched($path, $is_dir).is_ignore()); + } + }; + } + + const ROOT: &'static str = "/home/foobar/rust/rg"; + + ignored!(ig1, ROOT, "months", "months"); + ignored!(ig2, ROOT, "*.lock", "Cargo.lock"); + ignored!(ig3, ROOT, "*.rs", "src/main.rs"); + ignored!(ig4, ROOT, "src/*.rs", "src/main.rs"); + ignored!(ig5, ROOT, "/*.c", "cat-file.c"); + ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs"); + ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs"); + ignored!(ig8, ROOT, "foo/", "foo", true); + ignored!(ig9, ROOT, "**/foo", "foo"); + ignored!(ig10, ROOT, "**/foo", "src/foo"); + ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar"); + ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz"); + ignored!(ig13, ROOT, "**/foo/bar", "foo/bar"); + ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar"); + ignored!(ig15, ROOT, "abc/**", "abc/x"); + ignored!(ig16, ROOT, "abc/**", "abc/x/y"); + ignored!(ig17, ROOT, "abc/**", "abc/x/y/z"); + ignored!(ig18, ROOT, "a/**/b", "a/b"); + ignored!(ig19, ROOT, "a/**/b", "a/x/b"); + ignored!(ig20, ROOT, "a/**/b", "a/x/y/b"); + ignored!(ig21, ROOT, r"\!xy", "!xy"); + ignored!(ig22, ROOT, r"\#foo", "#foo"); + ignored!(ig23, ROOT, "foo", "./foo"); + ignored!(ig24, ROOT, "target", "grep/target"); + ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock"); + ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz"); + ignored!(ig27, ROOT, "foo/", "xyz/foo", true); + ignored!(ig28, "./src", "/llvm/", "./src/llvm", true); + ignored!(ig29, ROOT, "node_modules/ ", "node_modules", true); + ignored!(ig30, ROOT, "**/", "foo/bar", true); + ignored!(ig31, ROOT, "path1/*", "path1/foo"); + ignored!(ig32, ROOT, ".a/b", ".a/b"); + ignored!(ig33, "./", ".a/b", ".a/b"); + ignored!(ig34, ".", ".a/b", ".a/b"); + ignored!(ig35, "./.", ".a/b", ".a/b"); + ignored!(ig36, "././", ".a/b", ".a/b"); + ignored!(ig37, "././.", ".a/b", ".a/b"); + ignored!(ig38, ROOT, "\\[", "["); + ignored!(ig39, ROOT, "\\?", "?"); + ignored!(ig40, ROOT, "\\*", "*"); + ignored!(ig41, ROOT, "\\a", "a"); + ignored!(ig42, ROOT, "s*.rs", "sfoo.rs"); + ignored!(ig43, ROOT, "**", "foo.rs"); + ignored!(ig44, ROOT, "**/**/*", "a/foo.rs"); + + not_ignored!(ignot1, ROOT, "amonths", "months"); + not_ignored!(ignot2, ROOT, "monthsa", "months"); + not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs"); + not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c"); + not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs"); + not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs"); + not_ignored!(ignot7, ROOT, "foo/", "foo", false); + not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz"); + not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz"); + not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar"); + not_ignored!(ignot11, ROOT, "#foo", "#foo"); + not_ignored!(ignot12, ROOT, "\n\n\n", "foo"); + not_ignored!(ignot13, ROOT, "foo/**", "foo", true); + not_ignored!( + ignot14, + "./third_party/protobuf", + "m4/ltoptions.m4", + "./third_party/protobuf/csharp/src/packages/repositories.config" + ); + not_ignored!(ignot15, ROOT, "!/bar", "foo/bar"); + not_ignored!(ignot16, ROOT, "*\n!**/", "foo", true); + not_ignored!(ignot17, ROOT, "src/*.rs", "src/grep/src/main.rs"); + not_ignored!(ignot18, ROOT, "path1/*", "path2/path1/foo"); + not_ignored!(ignot19, ROOT, "s*.rs", "src/foo.rs"); + + fn bytes(s: &str) -> Vec { + s.to_string().into_bytes() + } + + fn path_string>(path: P) -> String { + path.as_ref().to_str().unwrap().to_string() + } + + #[test] + fn parse_excludes_file1() { + let data = bytes("[core]\nexcludesFile = /foo/bar"); + let got = super::parse_excludes_file(&data).unwrap(); + assert_eq!(path_string(got), "/foo/bar"); + } + + #[test] + fn parse_excludes_file2() { + let data = bytes("[core]\nexcludesFile = ~/foo/bar"); + let got = super::parse_excludes_file(&data).unwrap(); + assert_eq!(path_string(got), super::expand_tilde("~/foo/bar")); + } + + #[test] + fn parse_excludes_file3() { + let data = bytes("[core]\nexcludeFile = /foo/bar"); + assert!(super::parse_excludes_file(&data).is_none()); + } + + #[test] + fn parse_excludes_file4() { + let data = bytes("[core]\nexcludesFile = \"~/foo/bar\""); + let got = super::parse_excludes_file(&data); + assert_eq!(path_string(got.unwrap()), super::expand_tilde("~/foo/bar")); + } + + #[test] + fn parse_excludes_file5() { + let data = bytes("[core]\nexcludesFile = \" \"~/foo/bar \" \""); + assert!(super::parse_excludes_file(&data).is_none()); + } + + // See: https://github.com/BurntSushi/ripgrep/issues/106 + #[test] + fn regression_106() { + gi_from_str("/", " "); + } + + #[test] + fn case_insensitive() { + let gi = GitignoreBuilder::new(ROOT) + .case_insensitive(true) + .unwrap() + .add_str(None, "*.html") + .unwrap() + .build() + .unwrap(); + assert!(gi.matched("foo.html", false).is_ignore()); + assert!(gi.matched("foo.HTML", false).is_ignore()); + assert!(!gi.matched("foo.htm", false).is_ignore()); + assert!(!gi.matched("foo.HTM", false).is_ignore()); + } + + ignored!(cs1, ROOT, "*.html", "foo.html"); + not_ignored!(cs2, ROOT, "*.html", "foo.HTML"); + not_ignored!(cs3, ROOT, "*.html", "foo.htm"); + not_ignored!(cs4, ROOT, "*.html", "foo.HTM"); +} diff --git a/crates/ignore/src/lib.rs b/crates/ignore/src/lib.rs new file mode 100644 index 000000000000..cd0af7ad1c47 --- /dev/null +++ b/crates/ignore/src/lib.rs @@ -0,0 +1,564 @@ +/*! +The ignore crate provides a fast recursive directory iterator that respects +various filters such as globs, file types and `.gitignore` files. The precise +matching rules and precedence is explained in the documentation for +`WalkBuilder`. + +Secondarily, this crate exposes gitignore and file type matchers for use cases +that demand more fine-grained control. + +# Example + +This example shows the most basic usage of this crate. This code will +recursively traverse the current directory while automatically filtering out +files and directories according to ignore globs found in files like +`.ignore` and `.gitignore`: + + +```rust,no_run +use ignore::Walk; + +for result in Walk::new("./") { + // Each item yielded by the iterator is either a directory entry or an + // error, so either print the path or the error. + match result { + Ok(entry) => println!("{}", entry.path().display()), + Err(err) => println!("ERROR: {}", err), + } +} +``` + +# Example: advanced + +By default, the recursive directory iterator will ignore hidden files and +directories. This can be disabled by building the iterator with `WalkBuilder`: + +```rust,no_run +use ignore::WalkBuilder; + +for result in WalkBuilder::new("./").hidden(false).build() { + println!("{:?}", result); +} +``` + +See the documentation for `WalkBuilder` for many other options. +*/ + +#![deny(missing_docs)] + +use std::path::{Path, PathBuf}; + +pub use crate::walk::{ + DirEntry, ParallelVisitor, ParallelVisitorBuilder, Walk, WalkBuilder, WalkParallel, WalkState, +}; + +mod default_types; +mod dir; +pub mod gitignore; +pub mod overrides; +mod pathutil; +pub mod types; +mod walk; + +/// Represents an error that can occur when parsing a gitignore file. +#[derive(Debug)] +pub enum Error { + /// A collection of "soft" errors. These occur when adding an ignore + /// file partially succeeded. + Partial(Vec), + /// An error associated with a specific line number. + WithLineNumber { + /// The line number. + line: u64, + /// The underlying error. + err: Box, + }, + /// An error associated with a particular file path. + WithPath { + /// The file path. + path: PathBuf, + /// The underlying error. + err: Box, + }, + /// An error associated with a particular directory depth when recursively + /// walking a directory. + WithDepth { + /// The directory depth. + depth: usize, + /// The underlying error. + err: Box, + }, + /// An error that occurs when a file loop is detected when traversing + /// symbolic links. + Loop { + /// The ancestor file path in the loop. + ancestor: PathBuf, + /// The child file path in the loop. + child: PathBuf, + }, + /// An error that occurs when doing I/O, such as reading an ignore file. + Io(std::io::Error), + /// An error that occurs when trying to parse a glob. + Glob { + /// The original glob that caused this error. This glob, when + /// available, always corresponds to the glob provided by an end user. + /// e.g., It is the glob as written in a `.gitignore` file. + /// + /// (This glob may be distinct from the glob that is actually + /// compiled, after accounting for `gitignore` semantics.) + glob: Option, + /// The underlying glob error as a string. + err: String, + }, + /// A type selection for a file type that is not defined. + UnrecognizedFileType(String), + /// A user specified file type definition could not be parsed. + InvalidDefinition, +} + +impl Clone for Error { + fn clone(&self) -> Error { + match *self { + Error::Partial(ref errs) => Error::Partial(errs.clone()), + Error::WithLineNumber { line, ref err } => Error::WithLineNumber { + line, + err: err.clone(), + }, + Error::WithPath { ref path, ref err } => Error::WithPath { + path: path.clone(), + err: err.clone(), + }, + Error::WithDepth { depth, ref err } => Error::WithDepth { + depth, + err: err.clone(), + }, + Error::Loop { + ref ancestor, + ref child, + } => Error::Loop { + ancestor: ancestor.clone(), + child: child.clone(), + }, + Error::Io(ref err) => match err.raw_os_error() { + Some(e) => Error::Io(std::io::Error::from_raw_os_error(e)), + None => Error::Io(std::io::Error::new(err.kind(), err.to_string())), + }, + Error::Glob { ref glob, ref err } => Error::Glob { + glob: glob.clone(), + err: err.clone(), + }, + Error::UnrecognizedFileType(ref err) => Error::UnrecognizedFileType(err.clone()), + Error::InvalidDefinition => Error::InvalidDefinition, + } + } +} + +impl Error { + /// Returns true if this is a partial error. + /// + /// A partial error occurs when only some operations failed while others + /// may have succeeded. For example, an ignore file may contain an invalid + /// glob among otherwise valid globs. + pub fn is_partial(&self) -> bool { + match *self { + Error::Partial(_) => true, + Error::WithLineNumber { ref err, .. } => err.is_partial(), + Error::WithPath { ref err, .. } => err.is_partial(), + Error::WithDepth { ref err, .. } => err.is_partial(), + _ => false, + } + } + + /// Returns true if this error is exclusively an I/O error. + pub fn is_io(&self) -> bool { + match *self { + Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(), + Error::WithLineNumber { ref err, .. } => err.is_io(), + Error::WithPath { ref err, .. } => err.is_io(), + Error::WithDepth { ref err, .. } => err.is_io(), + Error::Loop { .. } => false, + Error::Io(_) => true, + Error::Glob { .. } => false, + Error::UnrecognizedFileType(_) => false, + Error::InvalidDefinition => false, + } + } + + /// Inspect the original [`std::io::Error`] if there is one. + /// + /// [`None`] is returned if the [`Error`] doesn't correspond to an + /// [`std::io::Error`]. This might happen, for example, when the error was + /// produced because a cycle was found in the directory tree while + /// following symbolic links. + /// + /// This method returns a borrowed value that is bound to the lifetime of the [`Error`]. To + /// obtain an owned value, the [`into_io_error`] can be used instead. + /// + /// > This is the original [`std::io::Error`] and is _not_ the same as + /// > [`impl From for std::io::Error`][impl] which contains + /// > additional context about the error. + /// + /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None + /// [`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html + /// [`From`]: https://doc.rust-lang.org/stable/std/convert/trait.From.html + /// [`Error`]: struct.Error.html + /// [`into_io_error`]: struct.Error.html#method.into_io_error + /// [impl]: struct.Error.html#impl-From%3CError%3E + pub fn io_error(&self) -> Option<&std::io::Error> { + match *self { + Error::Partial(ref errs) => { + if errs.len() == 1 { + errs[0].io_error() + } else { + None + } + } + Error::WithLineNumber { ref err, .. } => err.io_error(), + Error::WithPath { ref err, .. } => err.io_error(), + Error::WithDepth { ref err, .. } => err.io_error(), + Error::Loop { .. } => None, + Error::Io(ref err) => Some(err), + Error::Glob { .. } => None, + Error::UnrecognizedFileType(_) => None, + Error::InvalidDefinition => None, + } + } + + /// Similar to [`io_error`] except consumes self to convert to the original + /// [`std::io::Error`] if one exists. + /// + /// [`io_error`]: struct.Error.html#method.io_error + /// [`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html + pub fn into_io_error(self) -> Option { + match self { + Error::Partial(mut errs) => { + if errs.len() == 1 { + errs.remove(0).into_io_error() + } else { + None + } + } + Error::WithLineNumber { err, .. } => err.into_io_error(), + Error::WithPath { err, .. } => err.into_io_error(), + Error::WithDepth { err, .. } => err.into_io_error(), + Error::Loop { .. } => None, + Error::Io(err) => Some(err), + Error::Glob { .. } => None, + Error::UnrecognizedFileType(_) => None, + Error::InvalidDefinition => None, + } + } + + /// Returns a depth associated with recursively walking a directory (if + /// this error was generated from a recursive directory iterator). + pub fn depth(&self) -> Option { + match *self { + Error::WithPath { ref err, .. } => err.depth(), + Error::WithDepth { depth, .. } => Some(depth), + _ => None, + } + } + + /// Turn an error into a tagged error with the given file path. + fn with_path>(self, path: P) -> Error { + Error::WithPath { + path: path.as_ref().to_path_buf(), + err: Box::new(self), + } + } + + /// Turn an error into a tagged error with the given depth. + fn with_depth(self, depth: usize) -> Error { + Error::WithDepth { + depth, + err: Box::new(self), + } + } + + /// Turn an error into a tagged error with the given file path and line + /// number. If path is empty, then it is omitted from the error. + fn tagged>(self, path: P, lineno: u64) -> Error { + let errline = Error::WithLineNumber { + line: lineno, + err: Box::new(self), + }; + if path.as_ref().as_os_str().is_empty() { + return errline; + } + errline.with_path(path) + } + + /// Build an error from a walkdir error. + fn from_walkdir(err: walkdir::Error) -> Error { + let depth = err.depth(); + if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) { + return Error::WithDepth { + depth, + err: Box::new(Error::Loop { + ancestor: anc.to_path_buf(), + child: child.to_path_buf(), + }), + }; + } + let path = err.path().map(|p| p.to_path_buf()); + let mut ig_err = Error::Io(std::io::Error::from(err)); + if let Some(path) = path { + ig_err = Error::WithPath { + path, + err: Box::new(ig_err), + }; + } + ig_err + } +} + +impl std::error::Error for Error { + #[allow(deprecated)] + fn description(&self) -> &str { + match *self { + Error::Partial(_) => "partial error", + Error::WithLineNumber { ref err, .. } => err.description(), + Error::WithPath { ref err, .. } => err.description(), + Error::WithDepth { ref err, .. } => err.description(), + Error::Loop { .. } => "file system loop found", + Error::Io(ref err) => err.description(), + Error::Glob { ref err, .. } => err, + Error::UnrecognizedFileType(_) => "unrecognized file type", + Error::InvalidDefinition => "invalid definition", + } + } +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match *self { + Error::Partial(ref errs) => { + let msgs: Vec = errs.iter().map(|err| err.to_string()).collect(); + write!(f, "{}", msgs.join("\n")) + } + Error::WithLineNumber { line, ref err } => { + write!(f, "line {}: {}", line, err) + } + Error::WithPath { ref path, ref err } => { + write!(f, "{}: {}", path.display(), err) + } + Error::WithDepth { ref err, .. } => err.fmt(f), + Error::Loop { + ref ancestor, + ref child, + } => write!( + f, + "File system loop found: \ + {} points to an ancestor {}", + child.display(), + ancestor.display() + ), + Error::Io(ref err) => err.fmt(f), + Error::Glob { + glob: None, + ref err, + } => write!(f, "{}", err), + Error::Glob { + glob: Some(ref glob), + ref err, + } => { + write!(f, "error parsing glob '{}': {}", glob, err) + } + Error::UnrecognizedFileType(ref ty) => { + write!(f, "unrecognized file type: {}", ty) + } + Error::InvalidDefinition => write!( + f, + "invalid definition (format is type:glob, e.g., \ + html:*.html)" + ), + } + } +} + +impl From for Error { + fn from(err: std::io::Error) -> Error { + Error::Io(err) + } +} + +#[derive(Debug, Default)] +struct PartialErrorBuilder(Vec); + +impl PartialErrorBuilder { + fn push(&mut self, err: Error) { + self.0.push(err); + } + + fn push_ignore_io(&mut self, err: Error) { + if !err.is_io() { + self.push(err); + } + } + + fn maybe_push(&mut self, err: Option) { + if let Some(err) = err { + self.push(err); + } + } + + fn maybe_push_ignore_io(&mut self, err: Option) { + if let Some(err) = err { + self.push_ignore_io(err); + } + } + + fn into_error_option(mut self) -> Option { + if self.0.is_empty() { + None + } else if self.0.len() == 1 { + Some(self.0.pop().unwrap()) + } else { + Some(Error::Partial(self.0)) + } + } +} + +/// The result of a glob match. +/// +/// The type parameter `T` typically refers to a type that provides more +/// information about a particular match. For example, it might identify +/// the specific gitignore file and the specific glob pattern that caused +/// the match. +#[derive(Clone, Debug)] +pub enum Match { + /// The path didn't match any glob. + None, + /// The highest precedent glob matched indicates the path should be + /// ignored. + Ignore(T), + /// The highest precedent glob matched indicates the path should be + /// whitelisted. + Whitelist(T), +} + +impl Match { + /// Returns true if the match result didn't match any globs. + pub fn is_none(&self) -> bool { + match *self { + Match::None => true, + Match::Ignore(_) | Match::Whitelist(_) => false, + } + } + + /// Returns true if the match result implies the path should be ignored. + pub fn is_ignore(&self) -> bool { + match *self { + Match::Ignore(_) => true, + Match::None | Match::Whitelist(_) => false, + } + } + + /// Returns true if the match result implies the path should be + /// whitelisted. + pub fn is_whitelist(&self) -> bool { + match *self { + Match::Whitelist(_) => true, + Match::None | Match::Ignore(_) => false, + } + } + + /// Inverts the match so that `Ignore` becomes `Whitelist` and + /// `Whitelist` becomes `Ignore`. A non-match remains the same. + pub fn invert(self) -> Match { + match self { + Match::None => Match::None, + Match::Ignore(t) => Match::Whitelist(t), + Match::Whitelist(t) => Match::Ignore(t), + } + } + + /// Return the value inside this match if it exists. + pub fn inner(&self) -> Option<&T> { + match *self { + Match::None => None, + Match::Ignore(ref t) => Some(t), + Match::Whitelist(ref t) => Some(t), + } + } + + /// Apply the given function to the value inside this match. + /// + /// If the match has no value, then return the match unchanged. + pub fn map U>(self, f: F) -> Match { + match self { + Match::None => Match::None, + Match::Ignore(t) => Match::Ignore(f(t)), + Match::Whitelist(t) => Match::Whitelist(f(t)), + } + } + + /// Return the match if it is not none. Otherwise, return other. + pub fn or(self, other: Self) -> Self { + if self.is_none() { + other + } else { + self + } + } +} + +#[cfg(test)] +mod tests { + use std::{ + env, fs, + path::{Path, PathBuf}, + }; + + /// A convenient result type alias. + pub(crate) type Result = std::result::Result>; + + macro_rules! err { + ($($tt:tt)*) => { + Box::::from(format!($($tt)*)) + } + } + + /// A simple wrapper for creating a temporary directory that is + /// automatically deleted when it's dropped. + /// + /// We use this in lieu of tempfile because tempfile brings in too many + /// dependencies. + #[derive(Debug)] + pub struct TempDir(PathBuf); + + impl Drop for TempDir { + fn drop(&mut self) { + fs::remove_dir_all(&self.0).unwrap(); + } + } + + impl TempDir { + /// Create a new empty temporary directory under the system's configured + /// temporary directory. + pub fn new() -> Result { + use std::sync::atomic::{AtomicUsize, Ordering}; + + static TRIES: usize = 100; + static COUNTER: AtomicUsize = AtomicUsize::new(0); + + let tmpdir = env::temp_dir(); + for _ in 0..TRIES { + let count = COUNTER.fetch_add(1, Ordering::SeqCst); + let path = tmpdir.join("rust-ignore").join(count.to_string()); + if path.is_dir() { + continue; + } + fs::create_dir_all(&path) + .map_err(|e| err!("failed to create {}: {}", path.display(), e))?; + return Ok(TempDir(path)); + } + Err(err!("failed to create temp dir after {} tries", TRIES)) + } + + /// Return the underlying path to this temporary directory. + pub fn path(&self) -> &Path { + &self.0 + } + } +} diff --git a/crates/ignore/src/overrides.rs b/crates/ignore/src/overrides.rs new file mode 100644 index 000000000000..693c7dd0a79b --- /dev/null +++ b/crates/ignore/src/overrides.rs @@ -0,0 +1,265 @@ +/*! +The overrides module provides a way to specify a set of override globs. +This provides functionality similar to `--include` or `--exclude` in command +line tools. +*/ + +use std::path::Path; + +use crate::{ + gitignore::{self, Gitignore, GitignoreBuilder}, + Error, Match, +}; + +/// Glob represents a single glob in an override matcher. +/// +/// This is used to report information about the highest precedent glob +/// that matched. +/// +/// Note that not all matches necessarily correspond to a specific glob. For +/// example, if there are one or more whitelist globs and a file path doesn't +/// match any glob in the set, then the file path is considered to be ignored. +/// +/// The lifetime `'a` refers to the lifetime of the matcher that produced +/// this glob. +#[derive(Clone, Debug)] +#[allow(dead_code)] +pub struct Glob<'a>(GlobInner<'a>); + +#[derive(Clone, Debug)] +#[allow(dead_code)] +enum GlobInner<'a> { + /// No glob matched, but the file path should still be ignored. + UnmatchedIgnore, + /// A glob matched. + Matched(&'a gitignore::Glob), +} + +impl<'a> Glob<'a> { + fn unmatched() -> Glob<'a> { + Glob(GlobInner::UnmatchedIgnore) + } +} + +/// Manages a set of overrides provided explicitly by the end user. +#[derive(Clone, Debug)] +pub struct Override(Gitignore); + +impl Override { + /// Returns an empty matcher that never matches any file path. + pub fn empty() -> Override { + Override(Gitignore::empty()) + } + + /// Returns the directory of this override set. + /// + /// All matches are done relative to this path. + pub fn path(&self) -> &Path { + self.0.path() + } + + /// Returns true if and only if this matcher is empty. + /// + /// When a matcher is empty, it will never match any file path. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns the total number of ignore globs. + pub fn num_ignores(&self) -> u64 { + self.0.num_whitelists() + } + + /// Returns the total number of whitelisted globs. + pub fn num_whitelists(&self) -> u64 { + self.0.num_ignores() + } + + /// Returns whether the given file path matched a pattern in this override + /// matcher. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// If there are no overrides, then this always returns `Match::None`. + /// + /// If there is at least one whitelist override and `is_dir` is false, then + /// this never returns `Match::None`, since non-matches are interpreted as + /// ignored. + /// + /// The given path is matched to the globs relative to the path given + /// when building the override matcher. Specifically, before matching + /// `path`, its prefix (as determined by a common suffix of the directory + /// given) is stripped. If there is no common suffix/prefix overlap, then + /// `path` is assumed to reside in the same directory as the root path for + /// this set of overrides. + pub fn matched<'a, P: AsRef>(&'a self, path: P, is_dir: bool) -> Match> { + if self.is_empty() { + return Match::None; + } + let mat = self.0.matched(path, is_dir).invert(); + if mat.is_none() && self.num_whitelists() > 0 && !is_dir { + return Match::Ignore(Glob::unmatched()); + } + mat.map(move |giglob| Glob(GlobInner::Matched(giglob))) + } +} + +/// Builds a matcher for a set of glob overrides. +#[derive(Clone, Debug)] +pub struct OverrideBuilder { + builder: GitignoreBuilder, +} + +impl OverrideBuilder { + /// Create a new override builder. + /// + /// Matching is done relative to the directory path provided. + pub fn new>(path: P) -> OverrideBuilder { + OverrideBuilder { + builder: GitignoreBuilder::new(path), + } + } + + /// Builds a new override matcher from the globs added so far. + /// + /// Once a matcher is built, no new globs can be added to it. + pub fn build(&self) -> Result { + Ok(Override(self.builder.build()?)) + } + + /// Add a glob to the set of overrides. + /// + /// Globs provided here have precisely the same semantics as a single + /// line in a `gitignore` file, where the meaning of `!` is inverted: + /// namely, `!` at the beginning of a glob will ignore a file. Without `!`, + /// all matches of the glob provided are treated as whitelist matches. + pub fn add(&mut self, glob: &str) -> Result<&mut OverrideBuilder, Error> { + self.builder.add_line(None, glob)?; + Ok(self) + } + + /// Toggle whether the globs should be matched case insensitively or not. + /// + /// When this option is changed, only globs added after the change will be affected. + /// + /// This is disabled by default. + pub fn case_insensitive(&mut self, yes: bool) -> Result<&mut OverrideBuilder, Error> { + // TODO: This should not return a `Result`. Fix this in the next semver + // release. + self.builder.case_insensitive(yes)?; + Ok(self) + } +} + +#[cfg(test)] +mod tests { + use super::{Override, OverrideBuilder}; + + const ROOT: &'static str = "/home/andrew/foo"; + + fn ov(globs: &[&str]) -> Override { + let mut builder = OverrideBuilder::new(ROOT); + for glob in globs { + builder.add(glob).unwrap(); + } + builder.build().unwrap() + } + + #[test] + fn empty() { + let ov = ov(&[]); + assert!(ov.matched("a.foo", false).is_none()); + assert!(ov.matched("a", false).is_none()); + assert!(ov.matched("", false).is_none()); + } + + #[test] + fn simple() { + let ov = ov(&["*.foo", "!*.bar"]); + assert!(ov.matched("a.foo", false).is_whitelist()); + assert!(ov.matched("a.foo", true).is_whitelist()); + assert!(ov.matched("a.rs", false).is_ignore()); + assert!(ov.matched("a.rs", true).is_none()); + assert!(ov.matched("a.bar", false).is_ignore()); + assert!(ov.matched("a.bar", true).is_ignore()); + } + + #[test] + fn only_ignores() { + let ov = ov(&["!*.bar"]); + assert!(ov.matched("a.rs", false).is_none()); + assert!(ov.matched("a.rs", true).is_none()); + assert!(ov.matched("a.bar", false).is_ignore()); + assert!(ov.matched("a.bar", true).is_ignore()); + } + + #[test] + fn precedence() { + let ov = ov(&["*.foo", "!*.bar.foo"]); + assert!(ov.matched("a.foo", false).is_whitelist()); + assert!(ov.matched("a.baz", false).is_ignore()); + assert!(ov.matched("a.bar.foo", false).is_ignore()); + } + + #[test] + fn gitignore() { + let ov = ov(&["/foo", "bar/*.rs", "baz/**"]); + assert!(ov.matched("bar/lib.rs", false).is_whitelist()); + assert!(ov.matched("bar/wat/lib.rs", false).is_ignore()); + assert!(ov.matched("wat/bar/lib.rs", false).is_ignore()); + assert!(ov.matched("foo", false).is_whitelist()); + assert!(ov.matched("wat/foo", false).is_ignore()); + assert!(ov.matched("baz", false).is_ignore()); + assert!(ov.matched("baz/a", false).is_whitelist()); + assert!(ov.matched("baz/a/b", false).is_whitelist()); + } + + #[test] + fn allow_directories() { + // This tests that directories are NOT ignored when they are unmatched. + let ov = ov(&["*.rs"]); + assert!(ov.matched("foo.rs", false).is_whitelist()); + assert!(ov.matched("foo.c", false).is_ignore()); + assert!(ov.matched("foo", false).is_ignore()); + assert!(ov.matched("foo", true).is_none()); + assert!(ov.matched("src/foo.rs", false).is_whitelist()); + assert!(ov.matched("src/foo.c", false).is_ignore()); + assert!(ov.matched("src/foo", false).is_ignore()); + assert!(ov.matched("src/foo", true).is_none()); + } + + #[test] + fn absolute_path() { + let ov = ov(&["!/bar"]); + assert!(ov.matched("./foo/bar", false).is_none()); + } + + #[test] + fn case_insensitive() { + let ov = OverrideBuilder::new(ROOT) + .case_insensitive(true) + .unwrap() + .add("*.html") + .unwrap() + .build() + .unwrap(); + assert!(ov.matched("foo.html", false).is_whitelist()); + assert!(ov.matched("foo.HTML", false).is_whitelist()); + assert!(ov.matched("foo.htm", false).is_ignore()); + assert!(ov.matched("foo.HTM", false).is_ignore()); + } + + #[test] + fn default_case_sensitive() { + let ov = OverrideBuilder::new(ROOT) + .add("*.html") + .unwrap() + .build() + .unwrap(); + assert!(ov.matched("foo.html", false).is_whitelist()); + assert!(ov.matched("foo.HTML", false).is_ignore()); + assert!(ov.matched("foo.htm", false).is_ignore()); + assert!(ov.matched("foo.HTM", false).is_ignore()); + } +} diff --git a/crates/ignore/src/pathutil.rs b/crates/ignore/src/pathutil.rs new file mode 100644 index 000000000000..0ceb5a356c32 --- /dev/null +++ b/crates/ignore/src/pathutil.rs @@ -0,0 +1,141 @@ +use std::{ffi::OsStr, path::Path}; + +use crate::walk::DirEntry; + +/// Returns true if and only if this entry is considered to be hidden. +/// +/// This only returns true if the base name of the path starts with a `.`. +/// +/// On Unix, this implements a more optimized check. +#[cfg(unix)] +pub(crate) fn is_hidden(dent: &DirEntry) -> bool { + use std::os::unix::ffi::OsStrExt; + + if let Some(name) = file_name(dent.path()) { + name.as_bytes().get(0) == Some(&b'.') + } else { + false + } +} + +/// Returns true if and only if this entry is considered to be hidden. +/// +/// On Windows, this returns true if one of the following is true: +/// +/// * The base name of the path starts with a `.`. +/// * The file attributes have the `HIDDEN` property set. +#[cfg(windows)] +pub(crate) fn is_hidden(dent: &DirEntry) -> bool { + use std::os::windows::fs::MetadataExt; + use winapi_util::file; + + // This looks like we're doing an extra stat call, but on Windows, the + // directory traverser reuses the metadata retrieved from each directory + // entry and stores it on the DirEntry itself. So this is "free." + if let Ok(md) = dent.metadata() { + if file::is_hidden(md.file_attributes() as u64) { + return true; + } + } + if let Some(name) = file_name(dent.path()) { + name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) + } else { + false + } +} + +/// Returns true if and only if this entry is considered to be hidden. +/// +/// This only returns true if the base name of the path starts with a `.`. +#[cfg(not(any(unix, windows)))] +pub(crate) fn is_hidden(dent: &DirEntry) -> bool { + if let Some(name) = file_name(dent.path()) { + name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) + } else { + false + } +} + +/// Strip `prefix` from the `path` and return the remainder. +/// +/// If `path` doesn't have a prefix `prefix`, then return `None`. +#[cfg(unix)] +pub(crate) fn strip_prefix<'a, P: AsRef + ?Sized>( + prefix: &'a P, + path: &'a Path, +) -> Option<&'a Path> { + use std::os::unix::ffi::OsStrExt; + + let prefix = prefix.as_ref().as_os_str().as_bytes(); + let path = path.as_os_str().as_bytes(); + if prefix.len() > path.len() || prefix != &path[0..prefix.len()] { + None + } else { + Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..]))) + } +} + +/// Strip `prefix` from the `path` and return the remainder. +/// +/// If `path` doesn't have a prefix `prefix`, then return `None`. +#[cfg(not(unix))] +pub(crate) fn strip_prefix<'a, P: AsRef + ?Sized>( + prefix: &'a P, + path: &'a Path, +) -> Option<&'a Path> { + path.strip_prefix(prefix).ok() +} + +/// Returns true if this file path is just a file name. i.e., Its parent is +/// the empty string. +#[cfg(unix)] +pub(crate) fn is_file_name>(path: P) -> bool { + use std::os::unix::ffi::OsStrExt; + + use memchr::memchr; + + let path = path.as_ref().as_os_str().as_bytes(); + memchr(b'/', path).is_none() +} + +/// Returns true if this file path is just a file name. i.e., Its parent is +/// the empty string. +#[cfg(not(unix))] +pub(crate) fn is_file_name>(path: P) -> bool { + path.as_ref() + .parent() + .map(|p| p.as_os_str().is_empty()) + .unwrap_or(false) +} + +/// The final component of the path, if it is a normal file. +/// +/// If the path terminates in ., .., or consists solely of a root of prefix, +/// file_name will return None. +#[cfg(unix)] +pub(crate) fn file_name<'a, P: AsRef + ?Sized>(path: &'a P) -> Option<&'a OsStr> { + use memchr::memrchr; + use std::os::unix::ffi::OsStrExt; + + let path = path.as_ref().as_os_str().as_bytes(); + if path.is_empty() { + return None; + } else if path.len() == 1 && path[0] == b'.' { + return None; + } else if path.last() == Some(&b'.') { + return None; + } else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] { + return None; + } + let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0); + Some(OsStr::from_bytes(&path[last_slash..])) +} + +/// The final component of the path, if it is a normal file. +/// +/// If the path terminates in ., .., or consists solely of a root of prefix, +/// file_name will return None. +#[cfg(not(unix))] +pub(crate) fn file_name<'a, P: AsRef + ?Sized>(path: &'a P) -> Option<&'a OsStr> { + path.as_ref().file_name() +} diff --git a/crates/ignore/src/types.rs b/crates/ignore/src/types.rs new file mode 100644 index 000000000000..814ee4a0a5f7 --- /dev/null +++ b/crates/ignore/src/types.rs @@ -0,0 +1,601 @@ +/*! +The types module provides a way of associating globs on file names to file +types. + +This can be used to match specific types of files. For example, among +the default file types provided, the Rust file type is defined to be `*.rs` +with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with +name `c`. + +Note that the set of default types may change over time. + +# Example + +This shows how to create and use a simple file type matcher using the default +file types defined in this crate. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.select("rust"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("foo.rs", false).is_whitelist()); +assert!(matcher.matched("foo.c", false).is_ignore()); +``` + +# Example: negation + +This is like the previous example, but shows how negating a file type works. +That is, this will let us match file paths that *don't* correspond to a +particular file type. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.negate("c"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("foo.rs", false).is_none()); +assert!(matcher.matched("foo.c", false).is_ignore()); +``` + +# Example: custom file type definitions + +This shows how to extend this library default file type definitions with +your own. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.add("foo", "*.foo"); +// Another way of adding a file type definition. +// This is useful when accepting input from an end user. +builder.add_def("bar:*.bar"); +// Note: we only select `foo`, not `bar`. +builder.select("foo"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("x.foo", false).is_whitelist()); +// This is ignored because we only selected the `foo` file type. +assert!(matcher.matched("x.bar", false).is_ignore()); +``` + +We can also add file type definitions based on other definitions. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.add("foo", "*.foo"); +builder.add_def("bar:include:foo,cpp"); +builder.select("bar"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("x.foo", false).is_whitelist()); +assert!(matcher.matched("y.cpp", false).is_whitelist()); +``` +*/ + +use std::{collections::HashMap, path::Path, sync::Arc}; + +use { + globset::{GlobBuilder, GlobSet, GlobSetBuilder}, + regex_automata::util::pool::Pool, +}; + +use crate::{default_types::DEFAULT_TYPES, pathutil::file_name, Error, Match}; + +/// Glob represents a single glob in a set of file type definitions. +/// +/// There may be more than one glob for a particular file type. +/// +/// This is used to report information about the highest precedent glob +/// that matched. +/// +/// Note that not all matches necessarily correspond to a specific glob. +/// For example, if there are one or more selections and a file path doesn't +/// match any of those selections, then the file path is considered to be +/// ignored. +/// +/// The lifetime `'a` refers to the lifetime of the underlying file type +/// definition, which corresponds to the lifetime of the file type matcher. +#[derive(Clone, Debug)] +pub struct Glob<'a>(GlobInner<'a>); + +#[derive(Clone, Debug)] +enum GlobInner<'a> { + /// No glob matched, but the file path should still be ignored. + UnmatchedIgnore, + /// A glob matched. + Matched { + /// The file type definition which provided the glob. + def: &'a FileTypeDef, + }, +} + +impl<'a> Glob<'a> { + fn unmatched() -> Glob<'a> { + Glob(GlobInner::UnmatchedIgnore) + } + + /// Return the file type definition that matched, if one exists. A file type + /// definition always exists when a specific definition matches a file + /// path. + pub fn file_type_def(&self) -> Option<&FileTypeDef> { + match self { + Glob(GlobInner::UnmatchedIgnore) => None, + Glob(GlobInner::Matched { def, .. }) => Some(def), + } + } +} + +/// A single file type definition. +/// +/// File type definitions can be retrieved in aggregate from a file type +/// matcher. File type definitions are also reported when its responsible +/// for a match. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct FileTypeDef { + name: String, + globs: Vec, +} + +impl FileTypeDef { + /// Return the name of this file type. + pub fn name(&self) -> &str { + &self.name + } + + /// Return the globs used to recognize this file type. + pub fn globs(&self) -> &[String] { + &self.globs + } +} + +/// Types is a file type matcher. +#[derive(Clone, Debug)] +pub struct Types { + /// All of the file type definitions, sorted lexicographically by name. + defs: Vec, + /// All of the selections made by the user. + selections: Vec>, + /// Whether there is at least one Selection::Select in our selections. + /// When this is true, a Match::None is converted to Match::Ignore. + has_selected: bool, + /// A mapping from glob index in the set to two indices. The first is an + /// index into `selections` and the second is an index into the + /// corresponding file type definition's list of globs. + glob_to_selection: Vec<(usize, usize)>, + /// The set of all glob selections, used for actual matching. + set: GlobSet, + /// Temporary storage for globs that match. + matches: Arc>>, +} + +/// Indicates the type of a selection for a particular file type. +#[derive(Clone, Debug)] +enum Selection { + Select(String, T), + Negate(String, T), +} + +impl Selection { + fn is_negated(&self) -> bool { + match *self { + Selection::Select(..) => false, + Selection::Negate(..) => true, + } + } + + fn name(&self) -> &str { + match *self { + Selection::Select(ref name, _) => name, + Selection::Negate(ref name, _) => name, + } + } + + fn map U>(self, f: F) -> Selection { + match self { + Selection::Select(name, inner) => Selection::Select(name, f(inner)), + Selection::Negate(name, inner) => Selection::Negate(name, f(inner)), + } + } + + fn inner(&self) -> &T { + match *self { + Selection::Select(_, ref inner) => inner, + Selection::Negate(_, ref inner) => inner, + } + } +} + +impl Types { + /// Creates a new file type matcher that never matches any path and + /// contains no file type definitions. + pub fn empty() -> Types { + Types { + defs: vec![], + selections: vec![], + has_selected: false, + glob_to_selection: vec![], + set: GlobSetBuilder::new().build().unwrap(), + matches: Arc::new(Pool::new(|| vec![])), + } + } + + /// Returns true if and only if this matcher has zero selections. + pub fn is_empty(&self) -> bool { + self.selections.is_empty() + } + + /// Returns the number of selections used in this matcher. + pub fn len(&self) -> usize { + self.selections.len() + } + + /// Return the set of current file type definitions. + /// + /// Definitions and globs are sorted. + pub fn definitions(&self) -> &[FileTypeDef] { + &self.defs + } + + /// Returns a match for the given path against this file type matcher. + /// + /// The path is considered whitelisted if it matches a selected file type. + /// The path is considered ignored if it matches a negated file type. + /// If at least one file type is selected and `path` doesn't match, then + /// the path is also considered ignored. + pub fn matched<'a, P: AsRef>(&'a self, path: P, is_dir: bool) -> Match> { + // File types don't apply to directories, and we can't do anything + // if our glob set is empty. + if is_dir || self.set.is_empty() { + return Match::None; + } + // We only want to match against the file name, so extract it. + // If one doesn't exist, then we can't match it. + let name = match file_name(path.as_ref()) { + Some(name) => name, + None if self.has_selected => { + return Match::Ignore(Glob::unmatched()); + } + None => { + return Match::None; + } + }; + let mut matches = self.matches.get(); + self.set.matches_into(name, &mut *matches); + // The highest precedent match is the last one. + if let Some(&i) = matches.last() { + let (isel, _) = self.glob_to_selection[i]; + let sel = &self.selections[isel]; + let glob = Glob(GlobInner::Matched { def: sel.inner() }); + return if sel.is_negated() { + Match::Ignore(glob) + } else { + Match::Whitelist(glob) + }; + } + if self.has_selected { + Match::Ignore(Glob::unmatched()) + } else { + Match::None + } + } +} + +/// TypesBuilder builds a type matcher from a set of file type definitions and +/// a set of file type selections. +pub struct TypesBuilder { + types: HashMap, + selections: Vec>, +} + +impl TypesBuilder { + /// Create a new builder for a file type matcher. + /// + /// The builder contains *no* type definitions to start with. A set + /// of default type definitions can be added with `add_defaults`, and + /// additional type definitions can be added with `select` and `negate`. + pub fn new() -> TypesBuilder { + TypesBuilder { + types: HashMap::new(), + selections: vec![], + } + } + + /// Build the current set of file type definitions *and* selections into + /// a file type matcher. + pub fn build(&self) -> Result { + let defs = self.definitions(); + let has_selected = self.selections.iter().any(|s| !s.is_negated()); + + let mut selections = vec![]; + let mut glob_to_selection = vec![]; + let mut build_set = GlobSetBuilder::new(); + for (isel, selection) in self.selections.iter().enumerate() { + let def = match self.types.get(selection.name()) { + Some(def) => def.clone(), + None => { + let name = selection.name().to_string(); + return Err(Error::UnrecognizedFileType(name)); + } + }; + for (iglob, glob) in def.globs.iter().enumerate() { + build_set.add( + GlobBuilder::new(glob) + .literal_separator(true) + .build() + .map_err(|err| Error::Glob { + glob: Some(glob.to_string()), + err: err.kind().to_string(), + })?, + ); + glob_to_selection.push((isel, iglob)); + } + selections.push(selection.clone().map(move |_| def)); + } + let set = build_set.build().map_err(|err| Error::Glob { + glob: None, + err: err.to_string(), + })?; + Ok(Types { + defs, + selections, + has_selected, + glob_to_selection, + set, + matches: Arc::new(Pool::new(|| vec![])), + }) + } + + /// Return the set of current file type definitions. + /// + /// Definitions and globs are sorted. + pub fn definitions(&self) -> Vec { + let mut defs = vec![]; + for def in self.types.values() { + let mut def = def.clone(); + def.globs.sort(); + defs.push(def); + } + defs.sort_by(|def1, def2| def1.name().cmp(def2.name())); + defs + } + + /// Select the file type given by `name`. + /// + /// If `name` is `all`, then all file types currently defined are selected. + pub fn select(&mut self, name: &str) -> &mut TypesBuilder { + if name == "all" { + for name in self.types.keys() { + self.selections + .push(Selection::Select(name.to_string(), ())); + } + } else { + self.selections + .push(Selection::Select(name.to_string(), ())); + } + self + } + + /// Ignore the file type given by `name`. + /// + /// If `name` is `all`, then all file types currently defined are negated. + pub fn negate(&mut self, name: &str) -> &mut TypesBuilder { + if name == "all" { + for name in self.types.keys() { + self.selections + .push(Selection::Negate(name.to_string(), ())); + } + } else { + self.selections + .push(Selection::Negate(name.to_string(), ())); + } + self + } + + /// Clear any file type definitions for the type name given. + pub fn clear(&mut self, name: &str) -> &mut TypesBuilder { + self.types.remove(name); + self + } + + /// Add a new file type definition. `name` can be arbitrary and `pat` + /// should be a glob recognizing file paths belonging to the `name` type. + /// + /// If `name` is `all` or otherwise contains any character that is not a + /// Unicode letter or number, then an error is returned. + pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> { + if name == "all" || !name.chars().all(|c| c.is_alphanumeric()) { + return Err(Error::InvalidDefinition); + } + let (key, glob) = (name.to_string(), glob.to_string()); + self.types + .entry(key) + .or_insert_with(|| FileTypeDef { + name: name.to_string(), + globs: vec![], + }) + .globs + .push(glob); + Ok(()) + } + + /// Add a new file type definition specified in string form. There are two + /// valid formats: + /// 1. `{name}:{glob}`. This defines a 'root' definition that associates the + /// given name with the given glob. + /// 2. `{name}:include:{comma-separated list of already defined names}. + /// This defines an 'include' definition that associates the given name + /// with the definitions of the given existing types. + /// Names may not include any characters that are not + /// Unicode letters or numbers. + pub fn add_def(&mut self, def: &str) -> Result<(), Error> { + let parts: Vec<&str> = def.split(':').collect(); + match parts.len() { + 2 => { + let name = parts[0]; + let glob = parts[1]; + if name.is_empty() || glob.is_empty() { + return Err(Error::InvalidDefinition); + } + self.add(name, glob) + } + 3 => { + let name = parts[0]; + let types_string = parts[2]; + if name.is_empty() || parts[1] != "include" || types_string.is_empty() { + return Err(Error::InvalidDefinition); + } + let types = types_string.split(','); + // Check ahead of time to ensure that all types specified are + // present and fail fast if not. + if types.clone().any(|t| !self.types.contains_key(t)) { + return Err(Error::InvalidDefinition); + } + for type_name in types { + let globs = self.types.get(type_name).unwrap().globs.clone(); + for glob in globs { + self.add(name, &glob)?; + } + } + Ok(()) + } + _ => Err(Error::InvalidDefinition), + } + } + + /// Add a set of default file type definitions. + pub fn add_defaults(&mut self) -> &mut TypesBuilder { + static MSG: &'static str = "adding a default type should never fail"; + for &(names, exts) in DEFAULT_TYPES { + for name in names { + for ext in exts { + self.add(name, ext).expect(MSG); + } + } + } + self + } +} + +#[cfg(test)] +mod tests { + use super::TypesBuilder; + + macro_rules! matched { + ($name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr) => { + matched!($name, $types, $sel, $selnot, $path, true); + }; + (not, $name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr) => { + matched!($name, $types, $sel, $selnot, $path, false); + }; + ($name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr, $matched:expr) => { + #[test] + fn $name() { + let mut btypes = TypesBuilder::new(); + for tydef in $types { + btypes.add_def(tydef).unwrap(); + } + for sel in $sel { + btypes.select(sel); + } + for selnot in $selnot { + btypes.negate(selnot); + } + let types = btypes.build().unwrap(); + let mat = types.matched($path, false); + assert_eq!($matched, !mat.is_ignore()); + } + }; + } + + fn types() -> Vec<&'static str> { + vec![ + "html:*.html", + "html:*.htm", + "rust:*.rs", + "js:*.js", + "py:*.py", + "python:*.py", + "foo:*.{rs,foo}", + "combo:include:html,rust", + ] + } + + matched!(match1, types(), vec!["rust"], vec![], "lib.rs"); + matched!(match2, types(), vec!["html"], vec![], "index.html"); + matched!(match3, types(), vec!["html"], vec![], "index.htm"); + matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs"); + matched!(match5, types(), vec![], vec![], "index.html"); + matched!(match6, types(), vec![], vec!["rust"], "index.html"); + matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo"); + matched!(match8, types(), vec!["combo"], vec![], "index.html"); + matched!(match9, types(), vec!["combo"], vec![], "lib.rs"); + matched!(match10, types(), vec!["py"], vec![], "main.py"); + matched!(match11, types(), vec!["python"], vec![], "main.py"); + + matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html"); + matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs"); + matched!( + not, + matchnot3, + types(), + vec!["foo"], + vec!["rust"], + "main.rs" + ); + matched!( + not, + matchnot4, + types(), + vec!["rust"], + vec!["foo"], + "main.rs" + ); + matched!( + not, + matchnot5, + types(), + vec!["rust"], + vec!["foo"], + "main.foo" + ); + matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js"); + matched!(not, matchnot7, types(), vec!["py"], vec![], "index.html"); + matched!(not, matchnot8, types(), vec!["python"], vec![], "doc.md"); + + #[test] + fn test_invalid_defs() { + let mut btypes = TypesBuilder::new(); + for tydef in types() { + btypes.add_def(tydef).unwrap(); + } + // Preserve the original definitions for later comparison. + let original_defs = btypes.definitions(); + let bad_defs = vec![ + // Reference to type that does not exist + "combo:include:html,qwerty", + // Bad format + "combo:foobar:html,rust", + "", + ]; + for def in bad_defs { + assert!(btypes.add_def(def).is_err()); + // Ensure that nothing changed, even if some of the includes were valid. + assert_eq!(btypes.definitions(), original_defs); + } + } +} diff --git a/crates/ignore/src/walk.rs b/crates/ignore/src/walk.rs new file mode 100644 index 000000000000..9c1f7413d918 --- /dev/null +++ b/crates/ignore/src/walk.rs @@ -0,0 +1,2302 @@ +use std::{ + cmp::Ordering, + ffi::OsStr, + fs::{self, FileType, Metadata}, + io, + path::{Path, PathBuf}, + sync::atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering}, + sync::Arc, +}; + +use { + crossbeam_deque::{Stealer, Worker as Deque}, + same_file::Handle, + walkdir::WalkDir, +}; + +use crate::{ + dir::{Ignore, IgnoreBuilder}, + gitignore::{Gitignore, GitignoreBuilder}, + overrides::Override, + types::Types, + Error, PartialErrorBuilder, +}; + +/// A directory entry with a possible error attached. +/// +/// The error typically refers to a problem parsing ignore files in a +/// particular directory. +#[derive(Clone, Debug)] +pub struct DirEntry { + dent: DirEntryInner, + err: Option, +} + +impl DirEntry { + /// The full path that this entry represents. + pub fn path(&self) -> &Path { + self.dent.path() + } + + /// The full path that this entry represents. + /// Analogous to [`DirEntry::path`], but moves ownership of the path. + pub fn into_path(self) -> PathBuf { + self.dent.into_path() + } + + /// Whether this entry corresponds to a symbolic link or not. + pub fn path_is_symlink(&self) -> bool { + self.dent.path_is_symlink() + } + + /// Returns true if and only if this entry corresponds to stdin. + /// + /// i.e., The entry has depth 0 and its file name is `-`. + pub fn is_stdin(&self) -> bool { + self.dent.is_stdin() + } + + /// Return the metadata for the file that this entry points to. + pub fn metadata(&self) -> Result { + self.dent.metadata() + } + + /// Return the file type for the file that this entry points to. + /// + /// This entry doesn't have a file type if it corresponds to stdin. + pub fn file_type(&self) -> Option { + self.dent.file_type() + } + + /// Return the file name of this entry. + /// + /// If this entry has no file name (e.g., `/`), then the full path is + /// returned. + pub fn file_name(&self) -> &OsStr { + self.dent.file_name() + } + + /// Returns the depth at which this entry was created relative to the root. + pub fn depth(&self) -> usize { + self.dent.depth() + } + + /// Returns the underlying inode number if one exists. + /// + /// If this entry doesn't have an inode number, then `None` is returned. + #[cfg(unix)] + pub fn ino(&self) -> Option { + self.dent.ino() + } + + /// Returns an error, if one exists, associated with processing this entry. + /// + /// An example of an error is one that occurred while parsing an ignore + /// file. Errors related to traversing a directory tree itself are reported + /// as part of yielding the directory entry, and not with this method. + pub fn error(&self) -> Option<&Error> { + self.err.as_ref() + } + + /// Returns true if and only if this entry points to a directory. + pub(crate) fn is_dir(&self) -> bool { + self.dent.is_dir() + } + + fn new_stdin() -> DirEntry { + DirEntry { + dent: DirEntryInner::Stdin, + err: None, + } + } + + fn new_walkdir(dent: walkdir::DirEntry, err: Option) -> DirEntry { + DirEntry { + dent: DirEntryInner::Walkdir(dent), + err, + } + } + + fn new_raw(dent: DirEntryRaw, err: Option) -> DirEntry { + DirEntry { + dent: DirEntryInner::Raw(dent), + err, + } + } +} + +/// DirEntryInner is the implementation of DirEntry. +/// +/// It specifically represents three distinct sources of directory entries: +/// +/// 1. From the walkdir crate. +/// 2. Special entries that represent things like stdin. +/// 3. From a path. +/// +/// Specifically, (3) has to essentially re-create the DirEntry implementation +/// from WalkDir. +#[derive(Clone, Debug)] +enum DirEntryInner { + Stdin, + Walkdir(walkdir::DirEntry), + Raw(DirEntryRaw), +} + +impl DirEntryInner { + fn path(&self) -> &Path { + use self::DirEntryInner::*; + match *self { + Stdin => Path::new(""), + Walkdir(ref x) => x.path(), + Raw(ref x) => x.path(), + } + } + + fn into_path(self) -> PathBuf { + use self::DirEntryInner::*; + match self { + Stdin => PathBuf::from(""), + Walkdir(x) => x.into_path(), + Raw(x) => x.into_path(), + } + } + + fn path_is_symlink(&self) -> bool { + use self::DirEntryInner::*; + match *self { + Stdin => false, + Walkdir(ref x) => x.path_is_symlink(), + Raw(ref x) => x.path_is_symlink(), + } + } + + fn is_stdin(&self) -> bool { + match *self { + DirEntryInner::Stdin => true, + _ => false, + } + } + + fn metadata(&self) -> Result { + use self::DirEntryInner::*; + match *self { + Stdin => { + let err = Error::Io(io::Error::new( + io::ErrorKind::Other, + " has no metadata", + )); + Err(err.with_path("")) + } + Walkdir(ref x) => x + .metadata() + .map_err(|err| Error::Io(io::Error::from(err)).with_path(x.path())), + Raw(ref x) => x.metadata(), + } + } + + fn file_type(&self) -> Option { + use self::DirEntryInner::*; + match *self { + Stdin => None, + Walkdir(ref x) => Some(x.file_type()), + Raw(ref x) => Some(x.file_type()), + } + } + + fn file_name(&self) -> &OsStr { + use self::DirEntryInner::*; + match *self { + Stdin => OsStr::new(""), + Walkdir(ref x) => x.file_name(), + Raw(ref x) => x.file_name(), + } + } + + fn depth(&self) -> usize { + use self::DirEntryInner::*; + match *self { + Stdin => 0, + Walkdir(ref x) => x.depth(), + Raw(ref x) => x.depth(), + } + } + + #[cfg(unix)] + fn ino(&self) -> Option { + use self::DirEntryInner::*; + use walkdir::DirEntryExt; + match *self { + Stdin => None, + Walkdir(ref x) => Some(x.ino()), + Raw(ref x) => Some(x.ino()), + } + } + + /// Returns true if and only if this entry points to a directory. + fn is_dir(&self) -> bool { + self.file_type().map(|ft| ft.is_dir()).unwrap_or(false) + } +} + +/// DirEntryRaw is essentially copied from the walkdir crate so that we can +/// build `DirEntry`s from whole cloth in the parallel iterator. +#[derive(Clone)] +struct DirEntryRaw { + /// The path as reported by the `fs::ReadDir` iterator (even if it's a + /// symbolic link). + path: PathBuf, + /// The file type. Necessary for recursive iteration, so store it. + ty: FileType, + /// Is set when this entry was created from a symbolic link and the user + /// expects the iterator to follow symbolic links. + follow_link: bool, + /// The depth at which this entry was generated relative to the root. + depth: usize, + /// The underlying inode number (Unix only). + #[cfg(unix)] + ino: u64, + /// The underlying metadata (Windows only). We store this on Windows + /// because this comes for free while reading a directory. + #[cfg(windows)] + metadata: fs::Metadata, +} + +impl std::fmt::Debug for DirEntryRaw { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Leaving out FileType because it doesn't have a debug impl + // in Rust 1.9. We could add it if we really wanted to by manually + // querying each possibly file type. Meh. ---AG + f.debug_struct("DirEntryRaw") + .field("path", &self.path) + .field("follow_link", &self.follow_link) + .field("depth", &self.depth) + .finish() + } +} + +impl DirEntryRaw { + fn path(&self) -> &Path { + &self.path + } + + fn into_path(self) -> PathBuf { + self.path + } + + fn path_is_symlink(&self) -> bool { + self.ty.is_symlink() || self.follow_link + } + + fn metadata(&self) -> Result { + self.metadata_internal() + } + + #[cfg(windows)] + fn metadata_internal(&self) -> Result { + if self.follow_link { + fs::metadata(&self.path) + } else { + Ok(self.metadata.clone()) + } + .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path)) + } + + #[cfg(not(windows))] + fn metadata_internal(&self) -> Result { + if self.follow_link { + fs::metadata(&self.path) + } else { + fs::symlink_metadata(&self.path) + } + .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path)) + } + + fn file_type(&self) -> FileType { + self.ty + } + + fn file_name(&self) -> &OsStr { + self.path + .file_name() + .unwrap_or_else(|| self.path.as_os_str()) + } + + fn depth(&self) -> usize { + self.depth + } + + #[cfg(unix)] + fn ino(&self) -> u64 { + self.ino + } + + fn from_entry(depth: usize, ent: &fs::DirEntry) -> Result { + let ty = ent.file_type().map_err(|err| { + let err = Error::Io(io::Error::from(err)).with_path(ent.path()); + Error::WithDepth { + depth, + err: Box::new(err), + } + })?; + DirEntryRaw::from_entry_os(depth, ent, ty) + } + + #[cfg(windows)] + fn from_entry_os( + depth: usize, + ent: &fs::DirEntry, + ty: fs::FileType, + ) -> Result { + let md = ent.metadata().map_err(|err| { + let err = Error::Io(io::Error::from(err)).with_path(ent.path()); + Error::WithDepth { + depth, + err: Box::new(err), + } + })?; + Ok(DirEntryRaw { + path: ent.path(), + ty, + follow_link: false, + depth, + metadata: md, + }) + } + + #[cfg(unix)] + fn from_entry_os( + depth: usize, + ent: &fs::DirEntry, + ty: fs::FileType, + ) -> Result { + use std::os::unix::fs::DirEntryExt; + + Ok(DirEntryRaw { + path: ent.path(), + ty, + follow_link: false, + depth, + ino: ent.ino(), + }) + } + + // Placeholder implementation to allow compiling on non-standard platforms + // (e.g. wasm32). + #[cfg(not(any(windows, unix)))] + fn from_entry_os( + depth: usize, + ent: &fs::DirEntry, + ty: fs::FileType, + ) -> Result { + Err(Error::Io(io::Error::new( + io::ErrorKind::Other, + "unsupported platform", + ))) + } + + #[cfg(windows)] + fn from_path(depth: usize, pb: PathBuf, link: bool) -> Result { + let md = fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?; + Ok(DirEntryRaw { + path: pb, + ty: md.file_type(), + follow_link: link, + depth, + metadata: md, + }) + } + + #[cfg(unix)] + fn from_path(depth: usize, pb: PathBuf, link: bool) -> Result { + use std::os::unix::fs::MetadataExt; + + let md = fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?; + Ok(DirEntryRaw { + path: pb, + ty: md.file_type(), + follow_link: link, + depth, + ino: md.ino(), + }) + } + + // Placeholder implementation to allow compiling on non-standard platforms + // (e.g. wasm32). + #[cfg(not(any(windows, unix)))] + fn from_path(depth: usize, pb: PathBuf, link: bool) -> Result { + Err(Error::Io(io::Error::new( + io::ErrorKind::Other, + "unsupported platform", + ))) + } +} + +/// WalkBuilder builds a recursive directory iterator. +/// +/// The builder supports a large number of configurable options. This includes +/// specific glob overrides, file type matching, toggling whether hidden +/// files are ignored or not, and of course, support for respecting gitignore +/// files. +/// +/// By default, all ignore files found are respected. This includes `.ignore`, +/// `.gitignore`, `.git/info/exclude` and even your global gitignore +/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`. +/// +/// Some standard recursive directory options are also supported, such as +/// limiting the recursive depth or whether to follow symbolic links (disabled +/// by default). +/// +/// # Ignore rules +/// +/// There are many rules that influence whether a particular file or directory +/// is skipped by this iterator. Those rules are documented here. Note that +/// the rules assume a default configuration. +/// +/// * First, glob overrides are checked. If a path matches a glob override, +/// then matching stops. The path is then only skipped if the glob that matched +/// the path is an ignore glob. (An override glob is a whitelist glob unless it +/// starts with a `!`, in which case it is an ignore glob.) +/// * Second, ignore files are checked. Ignore files currently only come from +/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured +/// global gitignore file), plain `.ignore` files, which have the same format +/// as gitignore files, or explicitly added ignore files. The precedence order +/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and +/// finally explicitly added ignore files. Note that precedence between +/// different types of ignore files is not impacted by the directory hierarchy; +/// any `.ignore` file overrides all `.gitignore` files. Within each precedence +/// level, more nested ignore files have a higher precedence than less nested +/// ignore files. +/// * Third, if the previous step yields an ignore match, then all matching +/// is stopped and the path is skipped. If it yields a whitelist match, then +/// matching continues. A whitelist match can be overridden by a later matcher. +/// * Fourth, unless the path is a directory, the file type matcher is run on +/// the path. As above, if it yields an ignore match, then all matching is +/// stopped and the path is skipped. If it yields a whitelist match, then +/// matching continues. +/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the +/// path is skipped. +/// * Sixth, unless the path is a directory, the size of the file is compared +/// against the max filesize limit. If it exceeds the limit, it is skipped. +/// * Seventh, if the path has made it this far then it is yielded in the +/// iterator. +#[derive(Clone)] +pub struct WalkBuilder { + paths: Vec, + ig_builder: IgnoreBuilder, + max_depth: Option, + max_filesize: Option, + follow_links: bool, + same_file_system: bool, + sorter: Option, + threads: usize, + skip: Option>, + filter: Option, +} + +#[derive(Clone)] +enum Sorter { + ByName(Arc Ordering + Send + Sync + 'static>), + ByPath(Arc Ordering + Send + Sync + 'static>), +} + +#[derive(Clone)] +struct Filter(Arc bool + Send + Sync + 'static>); + +impl std::fmt::Debug for WalkBuilder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("WalkBuilder") + .field("paths", &self.paths) + .field("ig_builder", &self.ig_builder) + .field("max_depth", &self.max_depth) + .field("max_filesize", &self.max_filesize) + .field("follow_links", &self.follow_links) + .field("threads", &self.threads) + .field("skip", &self.skip) + .finish() + } +} + +impl WalkBuilder { + /// Create a new builder for a recursive directory iterator for the + /// directory given. + /// + /// Note that if you want to traverse multiple different directories, it + /// is better to call `add` on this builder than to create multiple + /// `Walk` values. + pub fn new>(path: P) -> WalkBuilder { + WalkBuilder { + paths: vec![path.as_ref().to_path_buf()], + ig_builder: IgnoreBuilder::new(), + max_depth: None, + max_filesize: None, + follow_links: false, + same_file_system: false, + sorter: None, + threads: 0, + skip: None, + filter: None, + } + } + + /// Build a new `Walk` iterator. + pub fn build(&self) -> Walk { + let follow_links = self.follow_links; + let max_depth = self.max_depth; + let sorter = self.sorter.clone(); + let its = self + .paths + .iter() + .map(move |p| { + if p == Path::new("-") { + (p.to_path_buf(), None) + } else { + let mut wd = WalkDir::new(p); + wd = wd.follow_links(follow_links || p.is_file()); + wd = wd.same_file_system(self.same_file_system); + if let Some(max_depth) = max_depth { + wd = wd.max_depth(max_depth); + } + if let Some(ref sorter) = sorter { + match sorter.clone() { + Sorter::ByName(cmp) => { + wd = wd.sort_by(move |a, b| cmp(a.file_name(), b.file_name())); + } + Sorter::ByPath(cmp) => { + wd = wd.sort_by(move |a, b| cmp(a.path(), b.path())); + } + } + } + (p.to_path_buf(), Some(WalkEventIter::from(wd))) + } + }) + .collect::>() + .into_iter(); + let ig_root = self.ig_builder.build(); + Walk { + its, + it: None, + ig_root: ig_root.clone(), + ig: ig_root.clone(), + max_filesize: self.max_filesize, + skip: self.skip.clone(), + filter: self.filter.clone(), + } + } + + /// Build a new `WalkParallel` iterator. + /// + /// Note that this *doesn't* return something that implements `Iterator`. + /// Instead, the returned value must be run with a closure. e.g., + /// `builder.build_parallel().run(|| |path| { println!("{path:?}"); WalkState::Continue })`. + pub fn build_parallel(&self) -> WalkParallel { + WalkParallel { + paths: self.paths.clone().into_iter(), + ig_root: self.ig_builder.build(), + max_depth: self.max_depth, + max_filesize: self.max_filesize, + follow_links: self.follow_links, + same_file_system: self.same_file_system, + threads: self.threads, + skip: self.skip.clone(), + filter: self.filter.clone(), + } + } + + /// Add a file path to the iterator. + /// + /// Each additional file path added is traversed recursively. This should + /// be preferred over building multiple `Walk` iterators since this + /// enables reusing resources across iteration. + pub fn add>(&mut self, path: P) -> &mut WalkBuilder { + self.paths.push(path.as_ref().to_path_buf()); + self + } + + /// The maximum depth to recurse. + /// + /// The default, `None`, imposes no depth restriction. + pub fn max_depth(&mut self, depth: Option) -> &mut WalkBuilder { + self.max_depth = depth; + self + } + + /// Whether to follow symbolic links or not. + pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder { + self.follow_links = yes; + self + } + + /// Whether to ignore files above the specified limit. + pub fn max_filesize(&mut self, filesize: Option) -> &mut WalkBuilder { + self.max_filesize = filesize; + self + } + + /// The number of threads to use for traversal. + /// + /// Note that this only has an effect when using `build_parallel`. + /// + /// The default setting is `0`, which chooses the number of threads + /// automatically using heuristics. + pub fn threads(&mut self, n: usize) -> &mut WalkBuilder { + self.threads = n; + self + } + + /// Add a global ignore file to the matcher. + /// + /// This has lower precedence than all other sources of ignore rules. + /// + /// If there was a problem adding the ignore file, then an error is + /// returned. Note that the error may indicate *partial* failure. For + /// example, if an ignore file contains an invalid glob, all other globs + /// are still applied. + pub fn add_ignore>(&mut self, path: P) -> Option { + let mut builder = GitignoreBuilder::new(""); + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push(builder.add(path)); + match builder.build() { + Ok(gi) => { + self.ig_builder.add_ignore(gi); + } + Err(err) => { + errs.push(err); + } + } + errs.into_error_option() + } + + /// CHANGED: Add a Gitignore to the builder. + pub fn add_gitignore(&mut self, gi: Gitignore) { + self.ig_builder.add_ignore(gi); + } + + /// Add a custom ignore file name + /// + /// These ignore files have higher precedence than all other ignore files. + /// + /// When specifying multiple names, earlier names have lower precedence than + /// later names. + pub fn add_custom_ignore_filename>( + &mut self, + file_name: S, + ) -> &mut WalkBuilder { + self.ig_builder.add_custom_ignore_filename(file_name); + self + } + + /// Add an override matcher. + /// + /// By default, no override matcher is used. + /// + /// This overrides any previous setting. + pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder { + self.ig_builder.overrides(overrides); + self + } + + /// Add a file type matcher. + /// + /// By default, no file type matcher is used. + /// + /// This overrides any previous setting. + pub fn types(&mut self, types: Types) -> &mut WalkBuilder { + self.ig_builder.types(types); + self + } + + /// Enables all the standard ignore filters. + /// + /// This toggles, as a group, all the filters that are enabled by default: + /// + /// - [hidden()](#method.hidden) + /// - [parents()](#method.parents) + /// - [ignore()](#method.ignore) + /// - [git_ignore()](#method.git_ignore) + /// - [git_global()](#method.git_global) + /// - [git_exclude()](#method.git_exclude) + /// + /// They may still be toggled individually after calling this function. + /// + /// This is (by definition) enabled by default. + pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder { + self.hidden(yes) + .parents(yes) + .ignore(yes) + .git_ignore(yes) + .git_global(yes) + .git_exclude(yes) + } + + /// Enables ignoring hidden files. + /// + /// This is enabled by default. + pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.hidden(yes); + self + } + + /// Enables reading ignore files from parent directories. + /// + /// If this is enabled, then .gitignore files in parent directories of each + /// file path given are respected. Otherwise, they are ignored. + /// + /// This is enabled by default. + pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.parents(yes); + self + } + + /// Enables reading `.ignore` files. + /// + /// `.ignore` files have the same semantics as `gitignore` files and are + /// supported by search tools such as ripgrep and The Silver Searcher. + /// + /// This is enabled by default. + pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.ignore(yes); + self + } + + /// Enables reading a global gitignore file, whose path is specified in + /// git's `core.excludesFile` config option. + /// + /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` + /// does not exist or does not specify `core.excludesFile`, then + /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not + /// set or is empty, then `$HOME/.config/git/ignore` is used instead. + /// + /// This is enabled by default. + pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_global(yes); + self + } + + /// Enables reading `.gitignore` files. + /// + /// `.gitignore` files have match semantics as described in the `gitignore` + /// man page. + /// + /// This is enabled by default. + pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_ignore(yes); + self + } + + /// Enables reading `.git/info/exclude` files. + /// + /// `.git/info/exclude` files have match semantics as described in the + /// `gitignore` man page. + /// + /// This is enabled by default. + pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_exclude(yes); + self + } + + /// Whether a git repository is required to apply git-related ignore + /// rules (global rules, .gitignore and local exclude rules). + /// + /// When disabled, git-related ignore rules are applied even when searching + /// outside a git repository. + pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.require_git(yes); + self + } + + /// Process ignore files case insensitively + /// + /// This is disabled by default. + pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.ignore_case_insensitive(yes); + self + } + + /// Set a function for sorting directory entries by their path. + /// + /// If a compare function is set, the resulting iterator will return all + /// paths in sorted order. The compare function will be called to compare + /// entries from the same directory. + /// + /// This is like `sort_by_file_name`, except the comparator accepts + /// a `&Path` instead of the base file name, which permits it to sort by + /// more criteria. + /// + /// This method will override any previous sorter set by this method or + /// by `sort_by_file_name`. + /// + /// Note that this is not used in the parallel iterator. + pub fn sort_by_file_path(&mut self, cmp: F) -> &mut WalkBuilder + where + F: Fn(&Path, &Path) -> Ordering + Send + Sync + 'static, + { + self.sorter = Some(Sorter::ByPath(Arc::new(cmp))); + self + } + + /// Set a function for sorting directory entries by file name. + /// + /// If a compare function is set, the resulting iterator will return all + /// paths in sorted order. The compare function will be called to compare + /// names from entries from the same directory using only the name of the + /// entry. + /// + /// This method will override any previous sorter set by this method or + /// by `sort_by_file_path`. + /// + /// Note that this is not used in the parallel iterator. + pub fn sort_by_file_name(&mut self, cmp: F) -> &mut WalkBuilder + where + F: Fn(&OsStr, &OsStr) -> Ordering + Send + Sync + 'static, + { + self.sorter = Some(Sorter::ByName(Arc::new(cmp))); + self + } + + /// Do not cross file system boundaries. + /// + /// When this option is enabled, directory traversal will not descend into + /// directories that are on a different file system from the root path. + /// + /// Currently, this option is only supported on Unix and Windows. If this + /// option is used on an unsupported platform, then directory traversal + /// will immediately return an error and will not yield any entries. + pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder { + self.same_file_system = yes; + self + } + + /// Do not yield directory entries that are believed to correspond to + /// stdout. + /// + /// This is useful when a command is invoked via shell redirection to a + /// file that is also being read. For example, `grep -r foo ./ > results` + /// might end up trying to search `results` even though it is also writing + /// to it, which could cause an unbounded feedback loop. Setting this + /// option prevents this from happening by skipping over the `results` + /// file. + /// + /// This is disabled by default. + pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder { + if yes { + self.skip = stdout_handle().map(Arc::new); + } else { + self.skip = None; + } + self + } + + /// Yields only entries which satisfy the given predicate and skips + /// descending into directories that do not satisfy the given predicate. + /// + /// The predicate is applied to all entries. If the predicate is + /// true, iteration carries on as normal. If the predicate is false, the + /// entry is ignored and if it is a directory, it is not descended into. + /// + /// Note that the errors for reading entries that may not satisfy the + /// predicate will still be yielded. + pub fn filter_entry

(&mut self, filter: P) -> &mut WalkBuilder + where + P: Fn(&DirEntry) -> bool + Send + Sync + 'static, + { + self.filter = Some(Filter(Arc::new(filter))); + self + } +} + +/// Walk is a recursive directory iterator over file paths in one or more +/// directories. +/// +/// Only file and directory paths matching the rules are returned. By default, +/// ignore files like `.gitignore` are respected. The precise matching rules +/// and precedence is explained in the documentation for `WalkBuilder`. +pub struct Walk { + its: std::vec::IntoIter<(PathBuf, Option)>, + it: Option, + ig_root: Ignore, + ig: Ignore, + max_filesize: Option, + skip: Option>, + filter: Option, +} + +impl Walk { + /// Creates a new recursive directory iterator for the file path given. + /// + /// Note that this uses default settings, which include respecting + /// `.gitignore` files. To configure the iterator, use `WalkBuilder` + /// instead. + pub fn new>(path: P) -> Walk { + WalkBuilder::new(path).build() + } + + fn skip_entry(&self, ent: &DirEntry) -> Result { + if ent.depth() == 0 { + return Ok(false); + } + // We ensure that trivial skipping is done before any other potentially + // expensive operations (stat, filesystem other) are done. This seems + // like an obvious optimization but becomes critical when filesystem + // operations even as simple as stat can result in significant + // overheads; an example of this was a bespoke filesystem layer in + // Windows that hosted files remotely and would download them on-demand + // when particular filesystem operations occurred. Users of this system + // who ensured correct file-type filters were being used could still + // get unnecessary file access resulting in large downloads. + if should_skip_entry(&self.ig, ent) { + return Ok(true); + } + if let Some(ref stdout) = self.skip { + if path_equals(ent, stdout)? { + return Ok(true); + } + } + if self.max_filesize.is_some() && !ent.is_dir() { + return Ok(skip_filesize( + self.max_filesize.unwrap(), + ent.path(), + &ent.metadata().ok(), + )); + } + if let Some(Filter(filter)) = &self.filter { + if !filter(ent) { + return Ok(true); + } + } + Ok(false) + } +} + +impl Iterator for Walk { + type Item = Result; + + #[inline(always)] + fn next(&mut self) -> Option> { + loop { + let ev = match self.it.as_mut().and_then(|it| it.next()) { + Some(ev) => ev, + None => { + match self.its.next() { + None => return None, + Some((_, None)) => { + return Some(Ok(DirEntry::new_stdin())); + } + Some((path, Some(it))) => { + self.it = Some(it); + if path.is_dir() { + let (ig, err) = self.ig_root.add_parents(path); + self.ig = ig; + if let Some(err) = err { + return Some(Err(err)); + } + } else { + self.ig = self.ig_root.clone(); + } + } + } + continue; + } + }; + match ev { + Err(err) => { + return Some(Err(Error::from_walkdir(err))); + } + Ok(WalkEvent::Exit) => { + self.ig = self.ig.parent().unwrap(); + } + Ok(WalkEvent::Dir(ent)) => { + let mut ent = DirEntry::new_walkdir(ent, None); + let should_skip = match self.skip_entry(&ent) { + Err(err) => return Some(Err(err)), + Ok(should_skip) => should_skip, + }; + if should_skip { + self.it.as_mut().unwrap().it.skip_current_dir(); + // Still need to push this on the stack because + // we'll get a WalkEvent::Exit event for this dir. + // We don't care if it errors though. + let (igtmp, _) = self.ig.add_child(ent.path()); + self.ig = igtmp; + continue; + } + let (igtmp, err) = self.ig.add_child(ent.path()); + self.ig = igtmp; + ent.err = err; + return Some(Ok(ent)); + } + Ok(WalkEvent::File(ent)) => { + let ent = DirEntry::new_walkdir(ent, None); + let should_skip = match self.skip_entry(&ent) { + Err(err) => return Some(Err(err)), + Ok(should_skip) => should_skip, + }; + if should_skip { + continue; + } + return Some(Ok(ent)); + } + } + } + } +} + +impl std::iter::FusedIterator for Walk {} + +/// WalkEventIter transforms a WalkDir iterator into an iterator that more +/// accurately describes the directory tree. Namely, it emits events that are +/// one of three types: directory, file or "exit." An "exit" event means that +/// the entire contents of a directory have been enumerated. +struct WalkEventIter { + depth: usize, + it: walkdir::IntoIter, + next: Option>, +} + +#[derive(Debug)] +enum WalkEvent { + Dir(walkdir::DirEntry), + File(walkdir::DirEntry), + Exit, +} + +impl From for WalkEventIter { + fn from(it: WalkDir) -> WalkEventIter { + WalkEventIter { + depth: 0, + it: it.into_iter(), + next: None, + } + } +} + +impl Iterator for WalkEventIter { + type Item = walkdir::Result; + + #[inline(always)] + fn next(&mut self) -> Option> { + let dent = self.next.take().or_else(|| self.it.next()); + let depth = match dent { + None => 0, + Some(Ok(ref dent)) => dent.depth(), + Some(Err(ref err)) => err.depth(), + }; + if depth < self.depth { + self.depth -= 1; + self.next = dent; + return Some(Ok(WalkEvent::Exit)); + } + self.depth = depth; + match dent { + None => None, + Some(Err(err)) => Some(Err(err)), + Some(Ok(dent)) => { + if walkdir_is_dir(&dent) { + self.depth += 1; + Some(Ok(WalkEvent::Dir(dent))) + } else { + Some(Ok(WalkEvent::File(dent))) + } + } + } + } +} + +/// WalkState is used in the parallel recursive directory iterator to indicate +/// whether walking should continue as normal, skip descending into a +/// particular directory or quit the walk entirely. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum WalkState { + /// Continue walking as normal. + Continue, + /// If the directory entry given is a directory, don't descend into it. + /// In all other cases, this has no effect. + Skip, + /// Quit the entire iterator as soon as possible. + /// + /// Note that this is an inherently asynchronous action. It is possible + /// for more entries to be yielded even after instructing the iterator + /// to quit. + Quit, +} + +impl WalkState { + fn is_continue(&self) -> bool { + *self == WalkState::Continue + } + + fn is_quit(&self) -> bool { + *self == WalkState::Quit + } +} + +/// A builder for constructing a visitor when using [`WalkParallel::visit`]. +/// The builder will be called for each thread started by `WalkParallel`. The +/// visitor returned from each builder is then called for every directory +/// entry. +pub trait ParallelVisitorBuilder<'s> { + /// Create per-thread `ParallelVisitor`s for `WalkParallel`. + fn build(&mut self) -> Box; +} + +impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s> for &'a mut P { + fn build(&mut self) -> Box { + (**self).build() + } +} + +/// Receives files and directories for the current thread. +/// +/// Setup for the traversal can be implemented as part of +/// [`ParallelVisitorBuilder::build`]. Teardown when traversal finishes can be +/// implemented by implementing the `Drop` trait on your traversal type. +pub trait ParallelVisitor: Send { + /// Receives files and directories for the current thread. This is called + /// once for every directory entry visited by traversal. + fn visit(&mut self, entry: Result) -> WalkState; +} + +struct FnBuilder { + builder: F, +} + +impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s> for FnBuilder { + fn build(&mut self) -> Box { + let visitor = (self.builder)(); + Box::new(FnVisitorImp { visitor }) + } +} + +type FnVisitor<'s> = Box) -> WalkState + Send + 's>; + +struct FnVisitorImp<'s> { + visitor: FnVisitor<'s>, +} + +impl<'s> ParallelVisitor for FnVisitorImp<'s> { + fn visit(&mut self, entry: Result) -> WalkState { + (self.visitor)(entry) + } +} + +/// WalkParallel is a parallel recursive directory iterator over files paths +/// in one or more directories. +/// +/// Only file and directory paths matching the rules are returned. By default, +/// ignore files like `.gitignore` are respected. The precise matching rules +/// and precedence is explained in the documentation for `WalkBuilder`. +/// +/// Unlike `Walk`, this uses multiple threads for traversing a directory. +pub struct WalkParallel { + paths: std::vec::IntoIter, + ig_root: Ignore, + max_filesize: Option, + max_depth: Option, + follow_links: bool, + same_file_system: bool, + threads: usize, + skip: Option>, + filter: Option, +} + +impl WalkParallel { + /// Execute the parallel recursive directory iterator. `mkf` is called + /// for each thread used for iteration. The function produced by `mkf` + /// is then in turn called for each visited file path. + pub fn run<'s, F>(self, mkf: F) + where + F: FnMut() -> FnVisitor<'s>, + { + self.visit(&mut FnBuilder { builder: mkf }) + } + + /// Execute the parallel recursive directory iterator using a custom + /// visitor. + /// + /// The builder given is used to construct a visitor for every thread + /// used by this traversal. The visitor returned from each builder is then + /// called for every directory entry seen by that thread. + /// + /// Typically, creating a custom visitor is useful if you need to perform + /// some kind of cleanup once traversal is finished. This can be achieved + /// by implementing `Drop` for your builder (or for your visitor, if you + /// want to execute cleanup for every thread that is launched). + /// + /// For example, each visitor might build up a data structure of results + /// corresponding to the directory entries seen for each thread. Since each + /// visitor runs on only one thread, this build-up can be done without + /// synchronization. Then, once traversal is complete, all of the results + /// can be merged together into a single data structure. + pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) { + let threads = self.threads(); + let mut stack = vec![]; + { + let mut visitor = builder.build(); + let mut paths = Vec::new().into_iter(); + std::mem::swap(&mut paths, &mut self.paths); + // Send the initial set of root paths to the pool of workers. Note + // that we only send directories. For files, we send to them the + // callback directly. + for path in paths { + let (dent, root_device) = if path == Path::new("-") { + (DirEntry::new_stdin(), None) + } else { + let root_device = if !self.same_file_system { + None + } else { + match device_num(&path) { + Ok(root_device) => Some(root_device), + Err(err) => { + let err = Error::Io(err).with_path(path); + if visitor.visit(Err(err)).is_quit() { + return; + } + continue; + } + } + }; + match DirEntryRaw::from_path(0, path, false) { + Ok(dent) => (DirEntry::new_raw(dent, None), root_device), + Err(err) => { + if visitor.visit(Err(err)).is_quit() { + return; + } + continue; + } + } + }; + stack.push(Message::Work(Work { + dent, + ignore: self.ig_root.clone(), + root_device, + })); + } + // ... but there's no need to start workers if we don't need them. + if stack.is_empty() { + return; + } + } + // Create the workers and then wait for them to finish. + let quit_now = Arc::new(AtomicBool::new(false)); + let active_workers = Arc::new(AtomicUsize::new(threads)); + let stacks = Stack::new_for_each_thread(threads, stack); + std::thread::scope(|s| { + let handles: Vec<_> = stacks + .into_iter() + .map(|stack| Worker { + visitor: builder.build(), + stack, + quit_now: quit_now.clone(), + active_workers: active_workers.clone(), + max_depth: self.max_depth, + max_filesize: self.max_filesize, + follow_links: self.follow_links, + skip: self.skip.clone(), + filter: self.filter.clone(), + }) + .map(|worker| s.spawn(|| worker.run())) + .collect(); + for handle in handles { + handle.join().unwrap(); + } + }); + } + + fn threads(&self) -> usize { + if self.threads == 0 { + 2 + } else { + self.threads + } + } +} + +/// Message is the set of instructions that a worker knows how to process. +enum Message { + /// A work item corresponds to a directory that should be descended into. + /// Work items for entries that should be skipped or ignored should not + /// be produced. + Work(Work), + /// This instruction indicates that the worker should quit. + Quit, +} + +/// A unit of work for each worker to process. +/// +/// Each unit of work corresponds to a directory that should be descended +/// into. +struct Work { + /// The directory entry. + dent: DirEntry, + /// Any ignore matchers that have been built for this directory's parents. + ignore: Ignore, + /// The root device number. When present, only files with the same device + /// number should be considered. + root_device: Option, +} + +impl Work { + /// Returns true if and only if this work item is a directory. + fn is_dir(&self) -> bool { + self.dent.is_dir() + } + + /// Returns true if and only if this work item is a symlink. + fn is_symlink(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_symlink()) + } + + /// Adds ignore rules for parent directories. + /// + /// Note that this only applies to entries at depth 0. On all other + /// entries, this is a no-op. + fn add_parents(&mut self) -> Option { + if self.dent.depth() > 0 { + return None; + } + // At depth 0, the path of this entry is a root path, so we can + // use it directly to add parent ignore rules. + let (ig, err) = self.ignore.add_parents(self.dent.path()); + self.ignore = ig; + err + } + + /// Reads the directory contents of this work item and adds ignore + /// rules for this directory. + /// + /// If there was a problem with reading the directory contents, then + /// an error is returned. If there was a problem reading the ignore + /// rules for this directory, then the error is attached to this + /// work item's directory entry. + fn read_dir(&mut self) -> Result { + let readdir = match fs::read_dir(self.dent.path()) { + Ok(readdir) => readdir, + Err(err) => { + let err = Error::from(err) + .with_path(self.dent.path()) + .with_depth(self.dent.depth()); + return Err(err); + } + }; + let (ig, err) = self.ignore.add_child(self.dent.path()); + self.ignore = ig; + self.dent.err = err; + Ok(readdir) + } +} + +/// A work-stealing stack. +#[derive(Debug)] +struct Stack { + /// This thread's index. + index: usize, + /// The thread-local stack. + deque: Deque, + /// The work stealers. + stealers: Arc<[Stealer]>, +} + +impl Stack { + /// Create a work-stealing stack for each thread. The given messages + /// correspond to the initial paths to start the search at. They will + /// be distributed automatically to each stack in a round-robin fashion. + fn new_for_each_thread(threads: usize, init: Vec) -> Vec { + // Using new_lifo() ensures each worker operates depth-first, not + // breadth-first. We do depth-first because a breadth first traversal + // on wide directories with a lot of gitignores is disastrous (for + // example, searching a directory tree containing all of crates.io). + let deques: Vec> = std::iter::repeat_with(Deque::new_lifo) + .take(threads) + .collect(); + let stealers = + Arc::<[Stealer]>::from(deques.iter().map(Deque::stealer).collect::>()); + let stacks: Vec = deques + .into_iter() + .enumerate() + .map(|(index, deque)| Stack { + index, + deque, + stealers: stealers.clone(), + }) + .collect(); + // Distribute the initial messages. + init.into_iter() + .zip(stacks.iter().cycle()) + .for_each(|(m, s)| s.push(m)); + stacks + } + + /// Push a message. + fn push(&self, msg: Message) { + self.deque.push(msg); + } + + /// Pop a message. + fn pop(&self) -> Option { + self.deque.pop().or_else(|| self.steal()) + } + + /// Steal a message from another queue. + fn steal(&self) -> Option { + // For fairness, try to steal from index + 1, index + 2, ... len - 1, + // then wrap around to 0, 1, ... index - 1. + let (left, right) = self.stealers.split_at(self.index); + // Don't steal from ourselves + let right = &right[1..]; + + right + .iter() + .chain(left.iter()) + .map(|s| s.steal_batch_and_pop(&self.deque)) + .find_map(|s| s.success()) + } +} + +/// A worker is responsible for descending into directories, updating the +/// ignore matchers, producing new work and invoking the caller's callback. +/// +/// Note that a worker is *both* a producer and a consumer. +struct Worker<'s> { + /// The caller's callback. + visitor: Box, + /// A work-stealing stack of work to do. + /// + /// We use a stack instead of a channel because a stack lets us visit + /// directories in depth first order. This can substantially reduce peak + /// memory usage by keeping both the number of file paths and gitignore + /// matchers in memory lower. + stack: Stack, + /// Whether all workers should terminate at the next opportunity. Note + /// that we need this because we don't want other `Work` to be done after + /// we quit. We wouldn't need this if have a priority channel. + quit_now: Arc, + /// The number of currently active workers. + active_workers: Arc, + /// The maximum depth of directories to descend. A value of `0` means no + /// descension at all. + max_depth: Option, + /// The maximum size a searched file can be (in bytes). If a file exceeds + /// this size it will be skipped. + max_filesize: Option, + /// Whether to follow symbolic links or not. When this is enabled, loop + /// detection is performed. + follow_links: bool, + /// A file handle to skip, currently is either `None` or stdout, if it's + /// a file and it has been requested to skip files identical to stdout. + skip: Option>, + /// A predicate applied to dir entries. If true, the entry and all + /// children will be skipped. + filter: Option, +} + +impl<'s> Worker<'s> { + /// Runs this worker until there is no more work left to do. + /// + /// The worker will call the caller's callback for all entries that aren't + /// skipped by the ignore matcher. + fn run(mut self) { + while let Some(work) = self.get_work() { + if let WalkState::Quit = self.run_one(work) { + self.quit_now(); + } + } + } + + fn run_one(&mut self, mut work: Work) -> WalkState { + // If the work is not a directory, then we can just execute the + // caller's callback immediately and move on. + if work.is_symlink() || !work.is_dir() { + return self.visitor.visit(Ok(work.dent)); + } + if let Some(err) = work.add_parents() { + let state = self.visitor.visit(Err(err)); + if state.is_quit() { + return state; + } + } + + let descend = if let Some(root_device) = work.root_device { + match is_same_file_system(root_device, work.dent.path()) { + Ok(true) => true, + Ok(false) => false, + Err(err) => { + let state = self.visitor.visit(Err(err)); + if state.is_quit() { + return state; + } + false + } + } + } else { + true + }; + + // Try to read the directory first before we transfer ownership + // to the provided closure. Do not unwrap it immediately, though, + // as we may receive an `Err` value e.g. in the case when we do not + // have sufficient read permissions to list the directory. + // In that case we still want to provide the closure with a valid + // entry before passing the error value. + let readdir = work.read_dir(); + let depth = work.dent.depth(); + let state = self.visitor.visit(Ok(work.dent)); + if !state.is_continue() { + return state; + } + if !descend { + return WalkState::Skip; + } + + let readdir = match readdir { + Ok(readdir) => readdir, + Err(err) => { + return self.visitor.visit(Err(err)); + } + }; + + if self.max_depth.map_or(false, |max| depth >= max) { + return WalkState::Skip; + } + for result in readdir { + let state = self.generate_work(&work.ignore, depth + 1, work.root_device, result); + if state.is_quit() { + return state; + } + } + WalkState::Continue + } + + /// Decides whether to submit the given directory entry as a file to + /// search. + /// + /// If the entry is a path that should be ignored, then this is a no-op. + /// Otherwise, the entry is pushed on to the queue. (The actual execution + /// of the callback happens in `run_one`.) + /// + /// If an error occurs while reading the entry, then it is sent to the + /// caller's callback. + /// + /// `ig` is the `Ignore` matcher for the parent directory. `depth` should + /// be the depth of this entry. `result` should be the item yielded by + /// a directory iterator. + fn generate_work( + &mut self, + ig: &Ignore, + depth: usize, + root_device: Option, + result: Result, + ) -> WalkState { + let fs_dent = match result { + Ok(fs_dent) => fs_dent, + Err(err) => { + return self.visitor.visit(Err(Error::from(err).with_depth(depth))); + } + }; + let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) { + Ok(dent) => DirEntry::new_raw(dent, None), + Err(err) => { + return self.visitor.visit(Err(err)); + } + }; + let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink()); + if self.follow_links && is_symlink { + let path = dent.path().to_path_buf(); + dent = match DirEntryRaw::from_path(depth, path, true) { + Ok(dent) => DirEntry::new_raw(dent, None), + Err(err) => { + return self.visitor.visit(Err(err)); + } + }; + if dent.is_dir() { + if let Err(err) = check_symlink_loop(ig, dent.path(), depth) { + return self.visitor.visit(Err(err)); + } + } + } + // N.B. See analogous call in the single-threaded implementation about + // why it's important for this to come before the checks below. + if should_skip_entry(ig, &dent) { + return WalkState::Continue; + } + if let Some(ref stdout) = self.skip { + let is_stdout = match path_equals(&dent, stdout) { + Ok(is_stdout) => is_stdout, + Err(err) => return self.visitor.visit(Err(err)), + }; + if is_stdout { + return WalkState::Continue; + } + } + let should_skip_filesize = if self.max_filesize.is_some() && !dent.is_dir() { + skip_filesize( + self.max_filesize.unwrap(), + dent.path(), + &dent.metadata().ok(), + ) + } else { + false + }; + let should_skip_filtered = if let Some(Filter(predicate)) = &self.filter { + !predicate(&dent) + } else { + false + }; + if !should_skip_filesize && !should_skip_filtered { + self.send(Work { + dent, + ignore: ig.clone(), + root_device, + }); + } + WalkState::Continue + } + + /// Returns the next directory to descend into. + /// + /// If all work has been exhausted, then this returns None. The worker + /// should then subsequently quit. + fn get_work(&mut self) -> Option { + let mut value = self.recv(); + loop { + // Simulate a priority channel: If quit_now flag is set, we can + // receive only quit messages. + if self.is_quit_now() { + value = Some(Message::Quit) + } + match value { + Some(Message::Work(work)) => { + return Some(work); + } + Some(Message::Quit) => { + // Repeat quit message to wake up sleeping threads, if + // any. The domino effect will ensure that every thread + // will quit. + self.send_quit(); + return None; + } + None => { + if self.deactivate_worker() == 0 { + // If deactivate_worker() returns 0, every worker thread + // is currently within the critical section between the + // acquire in deactivate_worker() and the release in + // activate_worker() below. For this to happen, every + // worker's local deque must be simultaneously empty, + // meaning there is no more work left at all. + self.send_quit(); + return None; + } + // Wait for next `Work` or `Quit` message. + loop { + if let Some(v) = self.recv() { + self.activate_worker(); + value = Some(v); + break; + } + // Our stack isn't blocking. Instead of burning the + // CPU waiting, we let the thread sleep for a bit. In + // general, this tends to only occur once the search is + // approaching termination. + let dur = std::time::Duration::from_millis(1); + std::thread::sleep(dur); + } + } + } + } + } + + /// Indicates that all workers should quit immediately. + fn quit_now(&self) { + self.quit_now.store(true, AtomicOrdering::SeqCst); + } + + /// Returns true if this worker should quit immediately. + fn is_quit_now(&self) -> bool { + self.quit_now.load(AtomicOrdering::SeqCst) + } + + /// Send work. + fn send(&self, work: Work) { + self.stack.push(Message::Work(work)); + } + + /// Send a quit message. + fn send_quit(&self) { + self.stack.push(Message::Quit); + } + + /// Receive work. + fn recv(&self) -> Option { + self.stack.pop() + } + + /// Deactivates a worker and returns the number of currently active workers. + fn deactivate_worker(&self) -> usize { + self.active_workers.fetch_sub(1, AtomicOrdering::Acquire) - 1 + } + + /// Reactivates a worker. + fn activate_worker(&self) { + self.active_workers.fetch_add(1, AtomicOrdering::Release); + } +} + +fn check_symlink_loop( + ig_parent: &Ignore, + child_path: &Path, + child_depth: usize, +) -> Result<(), Error> { + let hchild = Handle::from_path(child_path).map_err(|err| { + Error::from(err) + .with_path(child_path) + .with_depth(child_depth) + })?; + for ig in ig_parent + .parents() + .take_while(|ig| !ig.is_absolute_parent()) + { + let h = Handle::from_path(ig.path()).map_err(|err| { + Error::from(err) + .with_path(child_path) + .with_depth(child_depth) + })?; + if hchild == h { + return Err(Error::Loop { + ancestor: ig.path().to_path_buf(), + child: child_path.to_path_buf(), + } + .with_depth(child_depth)); + } + } + Ok(()) +} + +// Before calling this function, make sure that you ensure that is really +// necessary as the arguments imply a file stat. +fn skip_filesize(max_filesize: u64, path: &Path, ent: &Option) -> bool { + let filesize = match *ent { + Some(ref md) => Some(md.len()), + None => None, + }; + + if let Some(fs) = filesize { + if fs > max_filesize { + log::debug!("ignoring {}: {} bytes", path.display(), fs); + true + } else { + false + } + } else { + false + } +} + +fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool { + let m = ig.matched_dir_entry(dent); + if m.is_ignore() { + log::debug!("ignoring {}: {:?}", dent.path().display(), m); + true + } else if m.is_whitelist() { + log::debug!("whitelisting {}: {:?}", dent.path().display(), m); + false + } else { + false + } +} + +/// Returns a handle to stdout for filtering search. +/// +/// A handle is returned if and only if stdout is being redirected to a file. +/// The handle returned corresponds to that file. +/// +/// This can be used to ensure that we do not attempt to search a file that we +/// may also be writing to. +fn stdout_handle() -> Option { + let h = match Handle::stdout() { + Err(_) => return None, + Ok(h) => h, + }; + let md = match h.as_file().metadata() { + Err(_) => return None, + Ok(md) => md, + }; + if !md.is_file() { + return None; + } + Some(h) +} + +/// Returns true if and only if the given directory entry is believed to be +/// equivalent to the given handle. If there was a problem querying the path +/// for information to determine equality, then that error is returned. +fn path_equals(dent: &DirEntry, handle: &Handle) -> Result { + #[cfg(unix)] + fn never_equal(dent: &DirEntry, handle: &Handle) -> bool { + dent.ino() != Some(handle.ino()) + } + + #[cfg(not(unix))] + fn never_equal(_: &DirEntry, _: &Handle) -> bool { + false + } + + // If we know for sure that these two things aren't equal, then avoid + // the costly extra stat call to determine equality. + if dent.is_stdin() || never_equal(dent, handle) { + return Ok(false); + } + Handle::from_path(dent.path()) + .map(|h| &h == handle) + .map_err(|err| Error::Io(err).with_path(dent.path())) +} + +/// Returns true if the given walkdir entry corresponds to a directory. +/// +/// This is normally just `dent.file_type().is_dir()`, but when we aren't +/// following symlinks, the root directory entry may be a symlink to a +/// directory that we *do* follow---by virtue of it being specified by the user +/// explicitly. In that case, we need to follow the symlink and query whether +/// it's a directory or not. But we only do this for root entries to avoid an +/// additional stat check in most cases. +fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool { + if dent.file_type().is_dir() { + return true; + } + if !dent.file_type().is_symlink() || dent.depth() > 0 { + return false; + } + dent.path() + .metadata() + .ok() + .map_or(false, |md| md.file_type().is_dir()) +} + +/// Returns true if and only if the given path is on the same device as the +/// given root device. +fn is_same_file_system(root_device: u64, path: &Path) -> Result { + let dent_device = device_num(path).map_err(|err| Error::Io(err).with_path(path))?; + Ok(root_device == dent_device) +} + +#[cfg(unix)] +fn device_num>(path: P) -> io::Result { + use std::os::unix::fs::MetadataExt; + + path.as_ref().metadata().map(|md| md.dev()) +} + +#[cfg(windows)] +fn device_num>(path: P) -> io::Result { + use winapi_util::{file, Handle}; + + let h = Handle::from_path_any(path)?; + file::information(h).map(|info| info.volume_serial_number()) +} + +#[cfg(not(any(unix, windows)))] +fn device_num>(_: P) -> io::Result { + Err(io::Error::new( + io::ErrorKind::Other, + "walkdir: same_file_system option not supported on this platform", + )) +} + +#[cfg(test)] +mod tests { + use std::ffi::OsStr; + use std::fs::{self, File}; + use std::io::Write; + use std::path::Path; + use std::sync::{Arc, Mutex}; + + use super::{DirEntry, WalkBuilder, WalkState}; + use crate::tests::TempDir; + + fn wfile>(path: P, contents: &str) { + let mut file = File::create(path).unwrap(); + file.write_all(contents.as_bytes()).unwrap(); + } + + fn wfile_size>(path: P, size: u64) { + let file = File::create(path).unwrap(); + file.set_len(size).unwrap(); + } + + #[cfg(unix)] + fn symlink, Q: AsRef>(src: P, dst: Q) { + use std::os::unix::fs::symlink; + symlink(src, dst).unwrap(); + } + + fn mkdirp>(path: P) { + fs::create_dir_all(path).unwrap(); + } + + fn normal_path(unix: &str) -> String { + if cfg!(windows) { + unix.replace("\\", "/") + } else { + unix.to_string() + } + } + + fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec { + let mut paths = vec![]; + for result in builder.build() { + let dent = match result { + Err(_) => continue, + Ok(dent) => dent, + }; + let path = dent.path().strip_prefix(prefix).unwrap(); + if path.as_os_str().is_empty() { + continue; + } + paths.push(normal_path(path.to_str().unwrap())); + } + paths.sort(); + paths + } + + fn walk_collect_parallel(prefix: &Path, builder: &WalkBuilder) -> Vec { + let mut paths = vec![]; + for dent in walk_collect_entries_parallel(builder) { + let path = dent.path().strip_prefix(prefix).unwrap(); + if path.as_os_str().is_empty() { + continue; + } + paths.push(normal_path(path.to_str().unwrap())); + } + paths.sort(); + paths + } + + fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec { + let dents = Arc::new(Mutex::new(vec![])); + builder.build_parallel().run(|| { + let dents = dents.clone(); + Box::new(move |result| { + if let Ok(dent) = result { + dents.lock().unwrap().push(dent); + } + WalkState::Continue + }) + }); + + let dents = dents.lock().unwrap(); + dents.to_vec() + } + + fn mkpaths(paths: &[&str]) -> Vec { + let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect(); + paths.sort(); + paths + } + + fn tmpdir() -> TempDir { + TempDir::new().unwrap() + } + + fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) { + let got = walk_collect(prefix, builder); + assert_eq!(got, mkpaths(expected), "single threaded"); + let got = walk_collect_parallel(prefix, builder); + assert_eq!(got, mkpaths(expected), "parallel"); + } + + #[test] + fn no_ignores() { + let td = tmpdir(); + mkdirp(td.path().join("a/b/c")); + mkdirp(td.path().join("x/y")); + wfile(td.path().join("a/b/foo"), ""); + wfile(td.path().join("x/y/foo"), ""); + + assert_paths( + td.path(), + &WalkBuilder::new(td.path()), + &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"], + ); + } + + #[test] + fn custom_ignore() { + let td = tmpdir(); + let custom_ignore = ".customignore"; + mkdirp(td.path().join("a")); + wfile(td.path().join(custom_ignore), "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + builder.add_custom_ignore_filename(&custom_ignore); + assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]); + } + + #[test] + fn custom_ignore_exclusive_use() { + let td = tmpdir(); + let custom_ignore = ".customignore"; + mkdirp(td.path().join("a")); + wfile(td.path().join(custom_ignore), "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + builder.ignore(false); + builder.git_ignore(false); + builder.git_global(false); + builder.git_exclude(false); + builder.add_custom_ignore_filename(&custom_ignore); + assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]); + } + + #[test] + fn gitignore() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("a")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + assert_paths( + td.path(), + &WalkBuilder::new(td.path()), + &["bar", "a", "a/bar"], + ); + } + + #[test] + fn explicit_ignore() { + let td = tmpdir(); + let igpath = td.path().join(".not-an-ignore"); + mkdirp(td.path().join("a")); + wfile(&igpath, "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + assert!(builder.add_ignore(&igpath).is_none()); + assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]); + } + + #[test] + fn explicit_ignore_exclusive_use() { + let td = tmpdir(); + let igpath = td.path().join(".not-an-ignore"); + mkdirp(td.path().join("a")); + wfile(&igpath, "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + builder.standard_filters(false); + assert!(builder.add_ignore(&igpath).is_none()); + assert_paths( + td.path(), + &builder, + &[".not-an-ignore", "bar", "a", "a/bar"], + ); + } + + #[test] + fn gitignore_parent() { + let td = tmpdir(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("a")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("a/bar"), ""); + + let root = td.path().join("a"); + assert_paths(&root, &WalkBuilder::new(&root), &["bar"]); + } + + #[test] + fn max_depth() { + let td = tmpdir(); + mkdirp(td.path().join("a/b/c")); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("a/b/foo"), ""); + wfile(td.path().join("a/b/c/foo"), ""); + + let mut builder = WalkBuilder::new(td.path()); + assert_paths( + td.path(), + &builder, + &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"], + ); + assert_paths(td.path(), builder.max_depth(Some(0)), &[]); + assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]); + assert_paths( + td.path(), + builder.max_depth(Some(2)), + &["a", "a/b", "foo", "a/foo"], + ); + } + + #[test] + fn max_filesize() { + let td = tmpdir(); + mkdirp(td.path().join("a/b")); + wfile_size(td.path().join("foo"), 0); + wfile_size(td.path().join("bar"), 400); + wfile_size(td.path().join("baz"), 600); + wfile_size(td.path().join("a/foo"), 600); + wfile_size(td.path().join("a/bar"), 500); + wfile_size(td.path().join("a/baz"), 200); + + let mut builder = WalkBuilder::new(td.path()); + assert_paths( + td.path(), + &builder, + &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"], + ); + assert_paths( + td.path(), + builder.max_filesize(Some(0)), + &["a", "a/b", "foo"], + ); + assert_paths( + td.path(), + builder.max_filesize(Some(500)), + &["a", "a/b", "foo", "bar", "a/bar", "a/baz"], + ); + assert_paths( + td.path(), + builder.max_filesize(Some(50000)), + &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"], + ); + } + + #[cfg(unix)] // because symlinks on windows are weird + #[test] + fn symlinks() { + let td = tmpdir(); + mkdirp(td.path().join("a/b")); + symlink(td.path().join("a/b"), td.path().join("z")); + wfile(td.path().join("a/b/foo"), ""); + + let mut builder = WalkBuilder::new(td.path()); + assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]); + assert_paths( + td.path(), + &builder.follow_links(true), + &["a", "a/b", "a/b/foo", "z", "z/foo"], + ); + } + + #[cfg(unix)] // because symlinks on windows are weird + #[test] + fn first_path_not_symlink() { + let td = tmpdir(); + mkdirp(td.path().join("foo")); + + let dents = WalkBuilder::new(td.path().join("foo")) + .build() + .into_iter() + .collect::, _>>() + .unwrap(); + assert_eq!(1, dents.len()); + assert!(!dents[0].path_is_symlink()); + + let dents = walk_collect_entries_parallel(&WalkBuilder::new(td.path().join("foo"))); + assert_eq!(1, dents.len()); + assert!(!dents[0].path_is_symlink()); + } + + #[cfg(unix)] // because symlinks on windows are weird + #[test] + fn symlink_loop() { + let td = tmpdir(); + mkdirp(td.path().join("a/b")); + symlink(td.path().join("a"), td.path().join("a/b/c")); + + let mut builder = WalkBuilder::new(td.path()); + assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]); + assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]); + } + + // It's a little tricky to test the 'same_file_system' option since + // we need an environment with more than one file system. We adopt a + // heuristic where /sys is typically a distinct volume on Linux and roll + // with that. + #[test] + #[cfg(target_os = "linux")] + fn same_file_system() { + use super::device_num; + + // If for some reason /sys doesn't exist or isn't a directory, just + // skip this test. + if !Path::new("/sys").is_dir() { + return; + } + + // If our test directory actually isn't a different volume from /sys, + // then this test is meaningless and we shouldn't run it. + let td = tmpdir(); + if device_num(td.path()).unwrap() == device_num("/sys").unwrap() { + return; + } + + mkdirp(td.path().join("same_file")); + symlink("/sys", td.path().join("same_file").join("alink")); + + // Create a symlink to sys and enable following symlinks. If the + // same_file_system option doesn't work, then this probably will hit a + // permission error. Otherwise, it should just skip over the symlink + // completely. + let mut builder = WalkBuilder::new(td.path()); + builder.follow_links(true).same_file_system(true); + assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]); + } + + #[cfg(target_os = "linux")] + #[test] + fn no_read_permissions() { + let dir_path = Path::new("/root"); + + // There's no /etc/sudoers.d, skip the test. + if !dir_path.is_dir() { + return; + } + // We're the root, so the test won't check what we want it to. + if fs::read_dir(&dir_path).is_ok() { + return; + } + + // Check that we can't descend but get an entry for the parent dir. + let builder = WalkBuilder::new(&dir_path); + assert_paths(dir_path.parent().unwrap(), &builder, &["root"]); + } + + #[test] + fn filter() { + let td = tmpdir(); + mkdirp(td.path().join("a/b/c")); + mkdirp(td.path().join("x/y")); + wfile(td.path().join("a/b/foo"), ""); + wfile(td.path().join("x/y/foo"), ""); + + assert_paths( + td.path(), + &WalkBuilder::new(td.path()), + &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"], + ); + + assert_paths( + td.path(), + &WalkBuilder::new(td.path()).filter_entry(|entry| entry.file_name() != OsStr::new("a")), + &["x", "x/y", "x/y/foo"], + ); + } +} diff --git a/crates/ignore/tests/gitignore_matched_path_or_any_parents_tests.gitignore b/crates/ignore/tests/gitignore_matched_path_or_any_parents_tests.gitignore new file mode 100644 index 000000000000..ac09e12f7aba --- /dev/null +++ b/crates/ignore/tests/gitignore_matched_path_or_any_parents_tests.gitignore @@ -0,0 +1,216 @@ +# Based on https://github.com/behnam/gitignore-test/blob/master/.gitignore + +### file in root + +# MATCH /file_root_1 +file_root_00 + +# NO_MATCH +file_root_01/ + +# NO_MATCH +file_root_02/* + +# NO_MATCH +file_root_03/** + + +# MATCH /file_root_10 +/file_root_10 + +# NO_MATCH +/file_root_11/ + +# NO_MATCH +/file_root_12/* + +# NO_MATCH +/file_root_13/** + + +# NO_MATCH +*/file_root_20 + +# NO_MATCH +*/file_root_21/ + +# NO_MATCH +*/file_root_22/* + +# NO_MATCH +*/file_root_23/** + + +# MATCH /file_root_30 +**/file_root_30 + +# NO_MATCH +**/file_root_31/ + +# NO_MATCH +**/file_root_32/* + +# NO_MATCH +**/file_root_33/** + + +### file in sub-dir + +# MATCH /parent_dir/file_deep_1 +file_deep_00 + +# NO_MATCH +file_deep_01/ + +# NO_MATCH +file_deep_02/* + +# NO_MATCH +file_deep_03/** + + +# NO_MATCH +/file_deep_10 + +# NO_MATCH +/file_deep_11/ + +# NO_MATCH +/file_deep_12/* + +# NO_MATCH +/file_deep_13/** + + +# MATCH /parent_dir/file_deep_20 +*/file_deep_20 + +# NO_MATCH +*/file_deep_21/ + +# NO_MATCH +*/file_deep_22/* + +# NO_MATCH +*/file_deep_23/** + + +# MATCH /parent_dir/file_deep_30 +**/file_deep_30 + +# NO_MATCH +**/file_deep_31/ + +# NO_MATCH +**/file_deep_32/* + +# NO_MATCH +**/file_deep_33/** + + +### dir in root + +# MATCH /dir_root_00 +dir_root_00 + +# MATCH /dir_root_01 +dir_root_01/ + +# MATCH /dir_root_02 +dir_root_02/* + +# MATCH /dir_root_03 +dir_root_03/** + + +# MATCH /dir_root_10 +/dir_root_10 + +# MATCH /dir_root_11 +/dir_root_11/ + +# MATCH /dir_root_12 +/dir_root_12/* + +# MATCH /dir_root_13 +/dir_root_13/** + + +# NO_MATCH +*/dir_root_20 + +# NO_MATCH +*/dir_root_21/ + +# NO_MATCH +*/dir_root_22/* + +# NO_MATCH +*/dir_root_23/** + + +# MATCH /dir_root_30 +**/dir_root_30 + +# MATCH /dir_root_31 +**/dir_root_31/ + +# MATCH /dir_root_32 +**/dir_root_32/* + +# MATCH /dir_root_33 +**/dir_root_33/** + + +### dir in sub-dir + +# MATCH /parent_dir/dir_deep_00 +dir_deep_00 + +# MATCH /parent_dir/dir_deep_01 +dir_deep_01/ + +# NO_MATCH +dir_deep_02/* + +# NO_MATCH +dir_deep_03/** + + +# NO_MATCH +/dir_deep_10 + +# NO_MATCH +/dir_deep_11/ + +# NO_MATCH +/dir_deep_12/* + +# NO_MATCH +/dir_deep_13/** + + +# MATCH /parent_dir/dir_deep_20 +*/dir_deep_20 + +# MATCH /parent_dir/dir_deep_21 +*/dir_deep_21/ + +# MATCH /parent_dir/dir_deep_22 +*/dir_deep_22/* + +# MATCH /parent_dir/dir_deep_23 +*/dir_deep_23/** + + +# MATCH /parent_dir/dir_deep_30 +**/dir_deep_30 + +# MATCH /parent_dir/dir_deep_31 +**/dir_deep_31/ + +# MATCH /parent_dir/dir_deep_32 +**/dir_deep_32/* + +# MATCH /parent_dir/dir_deep_33 +**/dir_deep_33/** diff --git a/crates/ignore/tests/gitignore_matched_path_or_any_parents_tests.rs b/crates/ignore/tests/gitignore_matched_path_or_any_parents_tests.rs new file mode 100644 index 000000000000..b7b7c6f95087 --- /dev/null +++ b/crates/ignore/tests/gitignore_matched_path_or_any_parents_tests.rs @@ -0,0 +1,291 @@ +use std::path::Path; + +use ignore::gitignore::{Gitignore, GitignoreBuilder}; + +const IGNORE_FILE: &'static str = "tests/gitignore_matched_path_or_any_parents_tests.gitignore"; + +fn get_gitignore() -> Gitignore { + let mut builder = GitignoreBuilder::new("ROOT"); + let error = builder.add(IGNORE_FILE); + assert!(error.is_none(), "failed to open gitignore file"); + builder.build().unwrap() +} + +#[test] +#[should_panic(expected = "path is expected to be under the root")] +fn test_path_should_be_under_root() { + let gitignore = get_gitignore(); + let path = "/tmp/some_file"; + gitignore.matched_path_or_any_parents(Path::new(path), false); + assert!(false); +} + +#[test] +fn test_files_in_root() { + let gitignore = get_gitignore(); + let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), false); + + // 0x + assert!(m("ROOT/file_root_00").is_ignore()); + assert!(m("ROOT/file_root_01").is_none()); + assert!(m("ROOT/file_root_02").is_none()); + assert!(m("ROOT/file_root_03").is_none()); + + // 1x + assert!(m("ROOT/file_root_10").is_ignore()); + assert!(m("ROOT/file_root_11").is_none()); + assert!(m("ROOT/file_root_12").is_none()); + assert!(m("ROOT/file_root_13").is_none()); + + // 2x + assert!(m("ROOT/file_root_20").is_none()); + assert!(m("ROOT/file_root_21").is_none()); + assert!(m("ROOT/file_root_22").is_none()); + assert!(m("ROOT/file_root_23").is_none()); + + // 3x + assert!(m("ROOT/file_root_30").is_ignore()); + assert!(m("ROOT/file_root_31").is_none()); + assert!(m("ROOT/file_root_32").is_none()); + assert!(m("ROOT/file_root_33").is_none()); +} + +#[test] +fn test_files_in_deep() { + let gitignore = get_gitignore(); + let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), false); + + // 0x + assert!(m("ROOT/parent_dir/file_deep_00").is_ignore()); + assert!(m("ROOT/parent_dir/file_deep_01").is_none()); + assert!(m("ROOT/parent_dir/file_deep_02").is_none()); + assert!(m("ROOT/parent_dir/file_deep_03").is_none()); + + // 1x + assert!(m("ROOT/parent_dir/file_deep_10").is_none()); + assert!(m("ROOT/parent_dir/file_deep_11").is_none()); + assert!(m("ROOT/parent_dir/file_deep_12").is_none()); + assert!(m("ROOT/parent_dir/file_deep_13").is_none()); + + // 2x + assert!(m("ROOT/parent_dir/file_deep_20").is_ignore()); + assert!(m("ROOT/parent_dir/file_deep_21").is_none()); + assert!(m("ROOT/parent_dir/file_deep_22").is_none()); + assert!(m("ROOT/parent_dir/file_deep_23").is_none()); + + // 3x + assert!(m("ROOT/parent_dir/file_deep_30").is_ignore()); + assert!(m("ROOT/parent_dir/file_deep_31").is_none()); + assert!(m("ROOT/parent_dir/file_deep_32").is_none()); + assert!(m("ROOT/parent_dir/file_deep_33").is_none()); +} + +#[test] +fn test_dirs_in_root() { + let gitignore = get_gitignore(); + let m = + |path: &str, is_dir: bool| gitignore.matched_path_or_any_parents(Path::new(path), is_dir); + + // 00 + assert!(m("ROOT/dir_root_00", true).is_ignore()); + assert!(m("ROOT/dir_root_00/file", false).is_ignore()); + assert!(m("ROOT/dir_root_00/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_00/child_dir/file", false).is_ignore()); + + // 01 + assert!(m("ROOT/dir_root_01", true).is_ignore()); + assert!(m("ROOT/dir_root_01/file", false).is_ignore()); + assert!(m("ROOT/dir_root_01/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_01/child_dir/file", false).is_ignore()); + + // 02 + assert!(m("ROOT/dir_root_02", true).is_none()); // dir itself doesn't match + assert!(m("ROOT/dir_root_02/file", false).is_ignore()); + assert!(m("ROOT/dir_root_02/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_02/child_dir/file", false).is_ignore()); + + // 03 + assert!(m("ROOT/dir_root_03", true).is_none()); // dir itself doesn't match + assert!(m("ROOT/dir_root_03/file", false).is_ignore()); + assert!(m("ROOT/dir_root_03/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_03/child_dir/file", false).is_ignore()); + + // 10 + assert!(m("ROOT/dir_root_10", true).is_ignore()); + assert!(m("ROOT/dir_root_10/file", false).is_ignore()); + assert!(m("ROOT/dir_root_10/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_10/child_dir/file", false).is_ignore()); + + // 11 + assert!(m("ROOT/dir_root_11", true).is_ignore()); + assert!(m("ROOT/dir_root_11/file", false).is_ignore()); + assert!(m("ROOT/dir_root_11/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_11/child_dir/file", false).is_ignore()); + + // 12 + assert!(m("ROOT/dir_root_12", true).is_none()); // dir itself doesn't match + assert!(m("ROOT/dir_root_12/file", false).is_ignore()); + assert!(m("ROOT/dir_root_12/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_12/child_dir/file", false).is_ignore()); + + // 13 + assert!(m("ROOT/dir_root_13", true).is_none()); + assert!(m("ROOT/dir_root_13/file", false).is_ignore()); + assert!(m("ROOT/dir_root_13/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_13/child_dir/file", false).is_ignore()); + + // 20 + assert!(m("ROOT/dir_root_20", true).is_none()); + assert!(m("ROOT/dir_root_20/file", false).is_none()); + assert!(m("ROOT/dir_root_20/child_dir", true).is_none()); + assert!(m("ROOT/dir_root_20/child_dir/file", false).is_none()); + + // 21 + assert!(m("ROOT/dir_root_21", true).is_none()); + assert!(m("ROOT/dir_root_21/file", false).is_none()); + assert!(m("ROOT/dir_root_21/child_dir", true).is_none()); + assert!(m("ROOT/dir_root_21/child_dir/file", false).is_none()); + + // 22 + assert!(m("ROOT/dir_root_22", true).is_none()); + assert!(m("ROOT/dir_root_22/file", false).is_none()); + assert!(m("ROOT/dir_root_22/child_dir", true).is_none()); + assert!(m("ROOT/dir_root_22/child_dir/file", false).is_none()); + + // 23 + assert!(m("ROOT/dir_root_23", true).is_none()); + assert!(m("ROOT/dir_root_23/file", false).is_none()); + assert!(m("ROOT/dir_root_23/child_dir", true).is_none()); + assert!(m("ROOT/dir_root_23/child_dir/file", false).is_none()); + + // 30 + assert!(m("ROOT/dir_root_30", true).is_ignore()); + assert!(m("ROOT/dir_root_30/file", false).is_ignore()); + assert!(m("ROOT/dir_root_30/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_30/child_dir/file", false).is_ignore()); + + // 31 + assert!(m("ROOT/dir_root_31", true).is_ignore()); + assert!(m("ROOT/dir_root_31/file", false).is_ignore()); + assert!(m("ROOT/dir_root_31/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_31/child_dir/file", false).is_ignore()); + + // 32 + assert!(m("ROOT/dir_root_32", true).is_none()); // dir itself doesn't match + assert!(m("ROOT/dir_root_32/file", false).is_ignore()); + assert!(m("ROOT/dir_root_32/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_32/child_dir/file", false).is_ignore()); + + // 33 + assert!(m("ROOT/dir_root_33", true).is_none()); // dir itself doesn't match + assert!(m("ROOT/dir_root_33/file", false).is_ignore()); + assert!(m("ROOT/dir_root_33/child_dir", true).is_ignore()); + assert!(m("ROOT/dir_root_33/child_dir/file", false).is_ignore()); +} + +#[test] +fn test_dirs_in_deep() { + let gitignore = get_gitignore(); + let m = + |path: &str, is_dir: bool| gitignore.matched_path_or_any_parents(Path::new(path), is_dir); + + // 00 + assert!(m("ROOT/parent_dir/dir_deep_00", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_00/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_00/child_dir", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_00/child_dir/file", false).is_ignore()); + + // 01 + assert!(m("ROOT/parent_dir/dir_deep_01", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_01/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_01/child_dir", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_01/child_dir/file", false).is_ignore()); + + // 02 + assert!(m("ROOT/parent_dir/dir_deep_02", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_02/file", false).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_02/child_dir", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_02/child_dir/file", false).is_none()); + + // 03 + assert!(m("ROOT/parent_dir/dir_deep_03", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_03/file", false).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_03/child_dir", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_03/child_dir/file", false).is_none()); + + // 10 + assert!(m("ROOT/parent_dir/dir_deep_10", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_10/file", false).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_10/child_dir", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_10/child_dir/file", false).is_none()); + + // 11 + assert!(m("ROOT/parent_dir/dir_deep_11", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_11/file", false).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_11/child_dir", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_11/child_dir/file", false).is_none()); + + // 12 + assert!(m("ROOT/parent_dir/dir_deep_12", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_12/file", false).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_12/child_dir", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_12/child_dir/file", false).is_none()); + + // 13 + assert!(m("ROOT/parent_dir/dir_deep_13", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_13/file", false).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_13/child_dir", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_13/child_dir/file", false).is_none()); + + // 20 + assert!(m("ROOT/parent_dir/dir_deep_20", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_20/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_20/child_dir", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_20/child_dir/file", false).is_ignore()); + + // 21 + assert!(m("ROOT/parent_dir/dir_deep_21", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_21/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_21/child_dir", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_21/child_dir/file", false).is_ignore()); + + // 22 + // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_22", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_22/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_22/child_dir", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_22/child_dir/file", false).is_ignore()); + + // 23 + // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_23", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_23/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_23/child_dir", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_23/child_dir/file", false).is_ignore()); + + // 30 + assert!(m("ROOT/parent_dir/dir_deep_30", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_30/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_30/child_dir", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_30/child_dir/file", false).is_ignore()); + + // 31 + assert!(m("ROOT/parent_dir/dir_deep_31", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_31/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_31/child_dir", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_31/child_dir/file", false).is_ignore()); + + // 32 + // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_32", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_32/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_32/child_dir", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_32/child_dir/file", false).is_ignore()); + + // 33 + // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_33", true).is_none()); + assert!(m("ROOT/parent_dir/dir_deep_33/file", false).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_33/child_dir", true).is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_33/child_dir/file", false).is_ignore()); +} diff --git a/crates/node/src/lib.rs b/crates/node/src/lib.rs index 5811698c3bdd..11ff25b0aca0 100644 --- a/crates/node/src/lib.rs +++ b/crates/node/src/lib.rs @@ -28,20 +28,27 @@ pub struct GlobEntry { pub pattern: String, } -impl From for tailwindcss_oxide::ChangedContent<'_> { +#[derive(Debug, Clone)] +#[napi(object)] +pub struct SourceEntry { + /// Base path of the glob + pub base: String, + + /// Glob pattern + pub pattern: String, + + /// Negated flag + pub negated: bool, +} + +impl From for tailwindcss_oxide::ChangedContent { fn from(changed_content: ChangedContent) -> Self { if let Some(file) = changed_content.file { - return tailwindcss_oxide::ChangedContent::File( - file.into(), - changed_content.extension.into(), - ); + return tailwindcss_oxide::ChangedContent::File(file.into(), changed_content.extension); } if let Some(contents) = changed_content.content { - return tailwindcss_oxide::ChangedContent::Content( - contents, - changed_content.extension.into(), - ); + return tailwindcss_oxide::ChangedContent::Content(contents, changed_content.extension); } unreachable!() @@ -66,13 +73,23 @@ impl From for GlobEntry { } } +impl From for tailwindcss_oxide::PublicSourceEntry { + fn from(source: SourceEntry) -> Self { + Self { + base: source.base, + pattern: source.pattern, + negated: source.negated, + } + } +} + // --- #[derive(Debug, Clone)] #[napi(object)] pub struct ScannerOptions { /// Glob sources - pub sources: Option>, + pub sources: Option>, } #[derive(Debug, Clone)] @@ -96,11 +113,10 @@ impl Scanner { #[napi(constructor)] pub fn new(opts: ScannerOptions) -> Self { Self { - scanner: tailwindcss_oxide::Scanner::new( - opts - .sources - .map(|x| x.into_iter().map(Into::into).collect()), - ), + scanner: tailwindcss_oxide::Scanner::new(match opts.sources { + Some(sources) => sources.into_iter().map(Into::into).collect(), + None => vec![], + }), } } @@ -158,4 +174,14 @@ impl Scanner { .map(Into::into) .collect() } + + #[napi(getter)] + pub fn normalized_sources(&mut self) -> Vec { + self + .scanner + .get_normalized_sources() + .into_iter() + .map(Into::into) + .collect() + } } diff --git a/crates/oxide/Cargo.toml b/crates/oxide/Cargo.toml index f3b67d8d75fb..448fc5c9f227 100644 --- a/crates/oxide/Cargo.toml +++ b/crates/oxide/Cargo.toml @@ -13,11 +13,11 @@ crossbeam = "0.8.4" tracing = { version = "0.1.40", features = [] } tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } walkdir = "2.5.0" -ignore = "0.4.23" dunce = "1.0.5" bexpand = "1.2.0" fast-glob = "0.4.3" classification-macros = { path = "../classification-macros" } +ignore = { path = "../ignore" } regex = "1.11.1" fancy-regex = "0.14.0" diff --git a/crates/oxide/src/extractor/mod.rs b/crates/oxide/src/extractor/mod.rs index ce6d0aeae31b..737189104df9 100644 --- a/crates/oxide/src/extractor/mod.rs +++ b/crates/oxide/src/extractor/mod.rs @@ -202,7 +202,7 @@ mod tests { use std::hint::black_box; fn pre_process_input(input: &str, extension: &str) -> String { - let input = crate::pre_process_input(input.as_bytes(), extension); + let input = crate::scanner::pre_process_input(input.as_bytes(), extension); String::from_utf8(input).unwrap() } diff --git a/crates/oxide/src/extractor/pre_processors/ruby.rs b/crates/oxide/src/extractor/pre_processors/ruby.rs index edc7be49d7d3..121af8e5cd3d 100644 --- a/crates/oxide/src/extractor/pre_processors/ruby.rs +++ b/crates/oxide/src/extractor/pre_processors/ruby.rs @@ -3,7 +3,7 @@ use crate::cursor; use crate::extractor::bracket_stack; use crate::extractor::pre_processors::pre_processor::PreProcessor; -use crate::pre_process_input; +use crate::scanner::pre_process_input; use bstr::ByteSlice; use fancy_regex::Regex; use std::sync; diff --git a/crates/oxide/src/extractor/pre_processors/vue.rs b/crates/oxide/src/extractor/pre_processors/vue.rs index 15440bb865ff..119e2a3d2079 100644 --- a/crates/oxide/src/extractor/pre_processors/vue.rs +++ b/crates/oxide/src/extractor/pre_processors/vue.rs @@ -1,5 +1,5 @@ use crate::extractor::pre_processors::pre_processor::PreProcessor; -use crate::pre_process_input; +use crate::scanner::pre_process_input; use bstr::ByteSlice; use regex::Regex; use std::sync; diff --git a/crates/oxide/src/glob.rs b/crates/oxide/src/glob.rs index ca73b5ee116b..5b6e0715f2b4 100644 --- a/crates/oxide/src/glob.rs +++ b/crates/oxide/src/glob.rs @@ -1,11 +1,17 @@ -use fast_glob::glob_match; use fxhash::{FxHashMap, FxHashSet}; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use tracing::event; -use crate::GlobEntry; +#[derive(Debug, Clone, PartialEq)] +pub struct GlobEntry { + /// Base path of the glob + pub base: String, -pub fn hoist_static_glob_parts(entries: &Vec) -> Vec { + /// Glob pattern + pub pattern: String, +} + +pub fn hoist_static_glob_parts(entries: &Vec, emit_parent_glob: bool) -> Vec { let mut result = vec![]; for entry in entries { @@ -40,7 +46,7 @@ pub fn hoist_static_glob_parts(entries: &Vec) -> Vec { // If the base path is a file, then we want to move the file to the pattern, and point the // directory to the base. This is necessary for file watchers that can only listen to // folders. - if pattern.is_empty() && base.is_file() { + if emit_parent_glob && pattern.is_empty() && base.is_file() { result.push(GlobEntry { // SAFETY: `parent()` will be available because we verify `base` is a file, thus a // parent folder exists. @@ -83,7 +89,7 @@ pub fn hoist_static_glob_parts(entries: &Vec) -> Vec { /// tailwind --pwd ./project/components --content "**/*.js" /// ``` pub fn optimize_patterns(entries: &Vec) -> Vec { - let entries = hoist_static_glob_parts(entries); + let entries = hoist_static_glob_parts(entries, true); // Track all base paths and their patterns. Later we will turn them back into `GlobalEntry`s. let mut pattern_map: FxHashMap> = FxHashMap::default(); @@ -132,11 +138,23 @@ pub fn optimize_patterns(entries: &Vec) -> Vec { // using `*`. // // E.g.: -// Original input: `../project-b/**/*.{html,js}` -// Expanded input: `../project-b/**/*.html` & `../project-b/**/*.js` -// Split on first input: ("../project-b", "**/*.html") -// Split on second input: ("../project-b", "**/*.js") -fn split_pattern(pattern: &str) -> (Option, Option) { +// +// Input: +// - `../project-b/**/*.html` +// - `../project-b/**/*.js` +// +// Split results in: +// - `("../project-b", "**/*.html")` +// - `("../project-b", "**/*.js")` +// +// A static file glob should also be considered as a dynamic part. +// +// E.g.: +// +// Input: `../project-b/foo/bar.html` +// Split results in: `("../project-b/foo", "bar.html")` +// +pub fn split_pattern(pattern: &str) -> (Option, Option) { // No dynamic parts, so we can just return the input as-is. if !pattern.contains('*') { return (Some(pattern.to_owned()), None); @@ -168,14 +186,6 @@ fn split_pattern(pattern: &str) -> (Option, Option) { (static_part, dynamic_part) } -pub fn path_matches_globs(path: &Path, globs: &[GlobEntry]) -> bool { - let path = path.to_string_lossy(); - - globs - .iter() - .any(|g| glob_match(format!("{}/{}", g.base, g.pattern), path.as_bytes())) -} - #[cfg(test)] mod tests { use super::optimize_patterns; diff --git a/crates/oxide/src/lib.rs b/crates/oxide/src/lib.rs index 9088763c39de..d7f0321af16a 100644 --- a/crates/oxide/src/lib.rs +++ b/crates/oxide/src/lib.rs @@ -1,22 +1,3 @@ -use crate::glob::hoist_static_glob_parts; -use crate::scanner::allowed_paths::resolve_paths; -use crate::scanner::detect_sources::DetectSources; -use bexpand::Expression; -use bstr::ByteSlice; -use extractor::{Extracted, Extractor}; -use fast_glob::glob_match; -use fxhash::{FxHashMap, FxHashSet}; -use glob::optimize_patterns; -use paths::Path; -use rayon::prelude::*; -use scanner::allowed_paths::read_dir; -use std::borrow::Cow; -use std::fs; -use std::path::PathBuf; -use std::sync; -use std::time::SystemTime; -use tracing::event; - pub mod cursor; pub mod extractor; pub mod fast_skip; @@ -25,554 +6,7 @@ pub mod paths; pub mod scanner; pub mod throughput; -static SHOULD_TRACE: sync::LazyLock = sync::LazyLock::new( - || matches!(std::env::var("DEBUG"), Ok(value) if value.eq("*") || (value.contains("tailwindcss:oxide") && !value.contains("-tailwindcss:oxide"))), -); - -fn init_tracing() { - if !*SHOULD_TRACE { - return; - } - - _ = tracing_subscriber::fmt() - .with_max_level(tracing::Level::INFO) - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) - .compact() - .try_init(); -} - -#[derive(Debug, Clone)] -pub enum ChangedContent<'a> { - File(PathBuf, Cow<'a, str>), - Content(String, Cow<'a, str>), -} - -#[derive(Debug, Clone)] -pub struct ScanOptions { - /// Base path to start scanning from - pub base: Option, - /// Glob sources - pub sources: Vec, -} - -#[derive(Debug, Clone)] -pub struct ScanResult { - pub candidates: Vec, - pub files: Vec, - pub globs: Vec, -} - -#[derive(Debug, Clone, PartialEq)] -pub struct GlobEntry { - pub base: String, - pub pattern: String, -} - -#[derive(Debug, Clone, Default)] -pub struct Scanner { - /// Glob sources - sources: Option>, - - /// Scanner is ready to scan. We delay the file system traversal for detecting all files until - /// we actually need them. - ready: bool, - - /// All files that we have to scan - files: Vec, - - /// All directories, sub-directories, etc… we saw during source detection - dirs: Vec, - - /// All generated globs - globs: Vec, - - /// Track file modification times - mtimes: FxHashMap, - - /// Track unique set of candidates - candidates: FxHashSet, -} - -impl Scanner { - pub fn new(sources: Option>) -> Self { - Self { - sources, - ..Default::default() - } - } - - pub fn scan(&mut self) -> Vec { - init_tracing(); - - self.prepare(); - self.compute_candidates(); - - let mut candidates: Vec = self.candidates.clone().into_par_iter().collect(); - candidates.par_sort_unstable(); - - candidates - } - - #[tracing::instrument(skip_all)] - pub fn scan_content(&mut self, changed_content: Vec) -> Vec { - self.prepare(); - let candidates = parse_all_blobs(read_all_files(changed_content)); - - let mut new_candidates = vec![]; - for candidate in candidates { - if self.candidates.contains(&candidate) { - continue; - } - self.candidates.insert(candidate.clone()); - new_candidates.push(candidate); - } - - new_candidates - } - - #[tracing::instrument(skip_all)] - pub fn get_candidates_with_positions( - &mut self, - changed_content: ChangedContent, - ) -> Vec<(String, usize)> { - self.prepare(); - - let content = read_changed_content(changed_content).unwrap_or_default(); - let original_content = &content; - - // Workaround for legacy upgrades: - // - // `-[]` won't parse in the new parser (`[…]` must contain _something_), but we do need it - // for people using `group-[]` (which we will later replace with `in-[.group]` instead). - let content = content.replace("-[]", "XYZ"); - let offset = content.as_ptr() as usize; - - let mut extractor = Extractor::new(&content[..]); - - extractor - .extract() - .into_par_iter() - .flat_map(|extracted| match extracted { - Extracted::Candidate(s) => { - let i = s.as_ptr() as usize - offset; - let original = &original_content[i..i + s.len()]; - if original.contains_str("-[]") { - return Some(unsafe { - (String::from_utf8_unchecked(original.to_vec()), i) - }); - } - - // SAFETY: When we parsed the candidates, we already guaranteed that the byte - // slices are valid, therefore we don't have to re-check here when we want to - // convert it back to a string. - Some(unsafe { (String::from_utf8_unchecked(s.to_vec()), i) }) - } - - _ => None, - }) - .collect() - } - - #[tracing::instrument(skip_all)] - pub fn get_files(&mut self) -> Vec { - self.prepare(); - - self.files - .par_iter() - .filter_map(|x| Path::from(x.clone()).canonicalize().ok()) - .map(|x| x.to_string()) - .collect() - } - - #[tracing::instrument(skip_all)] - pub fn get_globs(&mut self) -> Vec { - self.prepare(); - - self.globs.clone() - } - - #[tracing::instrument(skip_all)] - fn compute_candidates(&mut self) { - let mut changed_content = vec![]; - - let current_mtimes = self - .files - .par_iter() - .map(|path| { - fs::metadata(path) - .and_then(|m| m.modified()) - .unwrap_or(SystemTime::now()) - }) - .collect::>(); - - for (idx, path) in self.files.iter().enumerate() { - let current_time = current_mtimes[idx]; - let previous_time = self.mtimes.insert(path.clone(), current_time); - - let should_scan_file = match previous_time { - // Time has changed, so we need to re-scan the file - Some(prev) if prev != current_time => true, - - // File was in the cache, no need to re-scan - Some(_) => false, - - // File didn't exist before, so we need to scan it - None => true, - }; - - if should_scan_file { - let extension = path.extension().unwrap_or_default().to_string_lossy(); - changed_content.push(ChangedContent::File(path.to_path_buf(), extension)) - } - } - - if !changed_content.is_empty() { - let candidates = parse_all_blobs(read_all_files(changed_content)); - self.candidates.par_extend(candidates); - } - } - - // Ensures that all files/globs are resolved and the scanner is ready to scan - // content for candidates. - fn prepare(&mut self) { - if self.ready { - self.check_for_new_files(); - return; - } - - self.scan_sources(); - - self.ready = true; - } - - #[tracing::instrument(skip_all)] - fn check_for_new_files(&mut self) { - let current_mtimes = self - .dirs - .par_iter() - .map(|path| { - fs::metadata(path) - .and_then(|m| m.modified()) - .unwrap_or(SystemTime::now()) - }) - .collect::>(); - - let mut modified_dirs: Vec = vec![]; - - // Check all directories to see if they were modified - for (idx, path) in self.dirs.iter().enumerate() { - let current_time = current_mtimes[idx]; - let previous_time = self.mtimes.insert(path.clone(), current_time); - - let should_scan = match previous_time { - // Time has changed, so we need to re-scan the file - Some(prev) if prev != current_time => true, - - // File was in the cache, no need to re-scan - Some(_) => false, - - // File didn't exist before, so we need to scan it - None => true, - }; - - if should_scan { - modified_dirs.push(path.clone()); - } - } - - // Scan all modified directories for their immediate files - let mut known = FxHashSet::from_iter(self.files.iter().chain(self.dirs.iter()).cloned()); - - while !modified_dirs.is_empty() { - let new_entries = modified_dirs - .iter() - .flat_map(|dir| read_dir(dir, Some(1))) - .map(|entry| entry.path().to_owned()) - .filter(|path| !known.contains(path)) - .collect::>(); - - modified_dirs.clear(); - - for path in new_entries { - if path.is_file() { - known.insert(path.clone()); - self.files.push(path); - } else if path.is_dir() { - known.insert(path.clone()); - self.dirs.push(path.clone()); - - // Recursively scan the new directory for files - modified_dirs.push(path); - } - } - } - } - - #[tracing::instrument(skip_all)] - fn scan_sources(&mut self) { - let Some(sources) = &self.sources else { - return; - }; - - if sources.is_empty() { - return; - } - - // Expand glob patterns and create new `GlobEntry` instances for each expanded pattern. - let sources = sources - .iter() - .flat_map(|source| { - let expression: Result = source.pattern[..].try_into(); - let Ok(expression) = expression else { - return vec![source.clone()]; - }; - - expression - .into_iter() - .filter_map(Result::ok) - .map(move |pattern| GlobEntry { - base: source.base.clone(), - pattern: pattern.into(), - }) - .collect::>() - }) - .collect::>(); - - // Partition sources into sources that should be promoted to auto source detection and - // sources that should be resolved as globs. - let (auto_sources, glob_sources): (Vec<_>, Vec<_>) = sources.iter().partition(|source| { - // If a glob ends with `/**/*`, then we just want to register the base path as a new - // base. Essentially converting it to use auto source detection. - if source.pattern.ends_with("**/*") { - return true; - } - - // Directories should be promoted to auto source detection - if PathBuf::from(&source.base).join(&source.pattern).is_dir() { - return true; - } - - false - }); - - fn join_paths(a: &str, b: &str) -> PathBuf { - let mut tmp = a.to_owned(); - let b = b.trim_end_matches("**/*").trim_end_matches('/'); - - if b.starts_with('/') { - return PathBuf::from(b); - } - - // On Windows a path like C:/foo.txt is absolute but C:foo.txt is not - // (the 2nd is relative to the CWD) - if b.chars().nth(1) == Some(':') && b.chars().nth(2) == Some('/') { - return PathBuf::from(b); - } - - tmp += "/"; - tmp += b; - - PathBuf::from(&tmp) - } - - for path in auto_sources.iter().filter_map(|source| { - dunce::canonicalize(join_paths(&source.base, &source.pattern)).ok() - }) { - // Insert a glob for the base path, so we can see new files/folders in the directory itself. - self.globs.push(GlobEntry { - base: path.to_string_lossy().into(), - pattern: "*".into(), - }); - - // Detect all files/folders in the directory - let detect_sources = DetectSources::new(path); - - let (files, globs, dirs) = detect_sources.detect(); - self.files.extend(files); - self.globs.extend(globs); - self.dirs.extend(dirs); - } - - // Turn `Vec<&GlobEntry>` in `Vec` - let glob_sources: Vec<_> = glob_sources.into_iter().cloned().collect(); - let hoisted = hoist_static_glob_parts(&glob_sources); - - for source in &hoisted { - // If the pattern is empty, then the base points to a specific file or folder already - // if it doesn't contain any dynamic parts. In that case we can use the base as the - // pattern. - // - // Otherwise we need to combine the base and the pattern, otherwise a pattern that - // looks like `*.html`, will never match a path that looks like - // `/my-project/project-a/index.html`, because it contains `/`. - // - // We can't prepend `**/`, because then `/my-project/project-a/nested/index.html` would - // match as well. - // - // Instead we combine the base and the pattern as a single glob pattern. - let mut full_pattern = source.base.clone().replace('\\', "/"); - - if !source.pattern.is_empty() { - full_pattern.push('/'); - full_pattern.push_str(&source.pattern); - } - - let base = PathBuf::from(&source.base); - for entry in resolve_paths(&base) { - let Some(file_type) = entry.file_type() else { - continue; - }; - - if !file_type.is_file() { - continue; - } - - let file_path = entry.into_path(); - - let Some(file_path_str) = file_path.to_str() else { - continue; - }; - - let file_path_str = file_path_str.replace('\\', "/"); - - if glob_match(&full_pattern, &file_path_str) { - self.files.push(file_path); - } - } - } - - self.globs.extend(hoisted); - - // Re-optimize the globs to reduce the number of patterns we have to scan. - self.globs = optimize_patterns(&self.globs); - } -} - -fn read_changed_content(c: ChangedContent) -> Option> { - let (content, extension) = match c { - ChangedContent::File(file, extension) => match std::fs::read(&file) { - Ok(content) => (content, extension), - Err(e) => { - event!(tracing::Level::ERROR, "Failed to read file: {:?}", e); - return None; - } - }, - - ChangedContent::Content(contents, extension) => (contents.into_bytes(), extension), - }; - - Some(pre_process_input(&content, &extension)) -} - -pub fn pre_process_input(content: &[u8], extension: &str) -> Vec { - use crate::extractor::pre_processors::*; - - match extension { - "clj" | "cljs" | "cljc" => Clojure.process(content), - "cshtml" | "razor" => Razor.process(content), - "haml" => Haml.process(content), - "json" => Json.process(content), - "pug" => Pug.process(content), - "rb" | "erb" => Ruby.process(content), - "slim" => Slim.process(content), - "svelte" => Svelte.process(content), - "vue" => Vue.process(content), - _ => content.to_vec(), - } -} - -#[tracing::instrument(skip_all)] -fn read_all_files(changed_content: Vec) -> Vec> { - event!( - tracing::Level::INFO, - "Reading {:?} file(s)", - changed_content.len() - ); - - changed_content - .into_par_iter() - .filter_map(read_changed_content) - .collect() -} - -#[tracing::instrument(skip_all)] -fn parse_all_blobs(blobs: Vec>) -> Vec { - let mut result: Vec<_> = blobs - .par_iter() - .flat_map(|blob| blob.par_split(|x| *x == b'\n')) - .filter_map(|blob| { - if blob.is_empty() { - return None; - } - - let extracted = crate::extractor::Extractor::new(blob).extract(); - if extracted.is_empty() { - return None; - } - - Some(FxHashSet::from_iter(extracted.into_iter().map( - |x| match x { - Extracted::Candidate(bytes) => bytes, - Extracted::CssVariable(bytes) => bytes, - }, - ))) - }) - .reduce(Default::default, |mut a, b| { - a.extend(b); - a - }) - .into_iter() - .map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) }) - .collect(); - - // SAFETY: Unstable sort is faster and in this scenario it's also safe because we are - // guaranteed to have unique candidates. - result.par_sort_unstable(); - - result -} - -#[cfg(test)] -mod tests { - use crate::Scanner; - - #[test] - fn test_positions() { - let mut scanner = Scanner::new(None); - - for (input, expected) in [ - // Before migrations - ( - r#"

"#, - vec![ - ("class".to_string(), 5), - ("tw:flex!".to_string(), 12), - ("tw:sm:block!".to_string(), 21), - ("tw:bg-linear-to-t".to_string(), 34), - ("flex".to_string(), 52), - ("tw:[color:red]".to_string(), 57), - ("tw:in-[.tw\\:group]:flex".to_string(), 72), - ], - ), - ] { - let candidates = scanner.get_candidates_with_positions(crate::ChangedContent::Content( - input.to_string(), - "html".into(), - )); - assert_eq!(candidates, expected); - } - } -} +pub use glob::GlobEntry; +pub use scanner::sources::PublicSourceEntry; +pub use scanner::ChangedContent; +pub use scanner::Scanner; diff --git a/crates/oxide/src/scanner/allowed_paths.rs b/crates/oxide/src/scanner/allowed_paths.rs deleted file mode 100644 index b906335f8979..000000000000 --- a/crates/oxide/src/scanner/allowed_paths.rs +++ /dev/null @@ -1,128 +0,0 @@ -use ignore::{DirEntry, WalkBuilder}; -use std::{path::Path, sync}; - -static BINARY_EXTENSIONS: sync::LazyLock> = sync::LazyLock::new(|| { - include_str!("fixtures/binary-extensions.txt") - .trim() - .lines() - .collect() -}); - -static IGNORED_EXTENSIONS: sync::LazyLock> = sync::LazyLock::new(|| { - include_str!("fixtures/ignored-extensions.txt") - .trim() - .lines() - .collect() -}); - -static IGNORED_FILES: sync::LazyLock> = sync::LazyLock::new(|| { - include_str!("fixtures/ignored-files.txt") - .trim() - .lines() - .collect() -}); - -static IGNORED_CONTENT_DIRS: sync::LazyLock> = - sync::LazyLock::new(|| vec![".git"]); - -#[tracing::instrument(skip_all)] -pub fn resolve_allowed_paths(root: &Path) -> impl Iterator { - // Read the directory recursively with no depth limit - read_dir(root, None) -} - -#[tracing::instrument(skip_all)] -pub fn resolve_paths(root: &Path) -> impl Iterator { - create_walk_builder(root).build().filter_map(Result::ok) -} - -pub fn read_dir(root: &Path, depth: Option) -> impl Iterator { - create_walk_builder(root) - .max_depth(depth) - .filter_entry(move |entry| match entry.file_type() { - Some(file_type) if file_type.is_dir() => match entry.file_name().to_str() { - Some(dir) => !IGNORED_CONTENT_DIRS.contains(&dir), - None => false, - }, - Some(file_type) if file_type.is_file() || file_type.is_symlink() => { - is_allowed_content_path(entry.path()) - } - _ => false, - }) - .build() - .filter_map(Result::ok) -} - -fn create_walk_builder(root: &Path) -> WalkBuilder { - let mut builder = WalkBuilder::new(root); - - // Scan hidden files / directories - builder.hidden(false); - - // By default, allow .gitignore files to be used regardless of whether or not - // a .git directory is present. This is an optimization for when projects - // are first created and may not be in a git repo yet. - builder.require_git(false); - - // Don't descend into .git directories inside the root folder - // This is necessary when `root` contains the `.git` dir. - builder.filter_entry(|entry| entry.file_name() != ".git"); - - // If we are in a git repo then require it to ensure that only rules within - // the repo are used. For example, we don't want to consider a .gitignore file - // in the user's home folder if we're in a git repo. - // - // The alternative is using a call like `.parents(false)` but that will - // prevent looking at parent directories for .gitignore files from within - // the repo and that's not what we want. - // - // For example, in a project with this structure: - // - // home - // .gitignore - // my-project - // .gitignore - // apps - // .gitignore - // web - // {root} - // - // We do want to consider all .gitignore files listed: - // - home/.gitignore - // - my-project/.gitignore - // - my-project/apps/.gitignore - // - // However, if a repo is initialized inside my-project then only the following - // make sense for consideration: - // - my-project/.gitignore - // - my-project/apps/.gitignore - // - // Setting the require_git(true) flag conditionally allows us to do this. - for parent in root.ancestors() { - if parent.join(".git").exists() { - builder.require_git(true); - break; - } - } - - builder -} - -pub fn is_allowed_content_path(path: &Path) -> bool { - // Skip known ignored files - if path - .file_name() - .unwrap() - .to_str() - .map(|s| IGNORED_FILES.contains(&s)) - .unwrap_or(false) - { - return false; - } - - // Skip known ignored extensions - path.extension() - .map(|s| s.to_str().unwrap_or_default()) - .map(|ext| !IGNORED_EXTENSIONS.contains(&ext) && !BINARY_EXTENSIONS.contains(&ext)) - .unwrap_or(false) -} diff --git a/crates/oxide/src/scanner/auto_source_detection.rs b/crates/oxide/src/scanner/auto_source_detection.rs new file mode 100644 index 000000000000..e9b7f64aacbc --- /dev/null +++ b/crates/oxide/src/scanner/auto_source_detection.rs @@ -0,0 +1,65 @@ +use ignore::gitignore::{Gitignore, GitignoreBuilder}; +use std::sync; + +/// All the default rules for auto source detection. +/// +/// This includes: +/// +/// - Ignoring common content directories like `.git` and `node_modules` +/// - Ignoring file extensions we definitely don't want to include like `.css` and `.scss` +/// - Ignoring common binary file extensions like `.png` and `.jpg` +/// - Ignoring common files like `yarn.lock` and `package-lock.json` +/// +pub static RULES: sync::LazyLock = sync::LazyLock::new(|| { + let mut builder = GitignoreBuilder::new(""); + + builder.add_line(None, &IGNORED_CONTENT_DIRS_GLOB).unwrap(); + builder.add_line(None, &IGNORED_EXTENSIONS_GLOB).unwrap(); + builder.add_line(None, &BINARY_EXTENSIONS_GLOB).unwrap(); + builder.add_line(None, &IGNORED_FILES_GLOB).unwrap(); + + builder.build().unwrap() +}); + +pub static IGNORED_CONTENT_DIRS: sync::LazyLock> = sync::LazyLock::new(|| { + include_str!("fixtures/ignored-content-dirs.txt") + .trim() + .lines() + .collect() +}); + +static IGNORED_CONTENT_DIRS_GLOB: sync::LazyLock = + sync::LazyLock::new(|| format!("{{{}}}/", IGNORED_CONTENT_DIRS.join(","))); + +static IGNORED_EXTENSIONS_GLOB: sync::LazyLock = sync::LazyLock::new(|| { + format!( + "*.{{{}}}", + include_str!("fixtures/ignored-extensions.txt") + .trim() + .lines() + .collect::>() + .join(",") + ) +}); + +pub static BINARY_EXTENSIONS_GLOB: sync::LazyLock = sync::LazyLock::new(|| { + format!( + "*.{{{}}}", + include_str!("fixtures/binary-extensions.txt") + .trim() + .lines() + .collect::>() + .join(",") + ) +}); + +static IGNORED_FILES_GLOB: sync::LazyLock = sync::LazyLock::new(|| { + format!( + "{{{}}}", + include_str!("fixtures/ignored-files.txt") + .trim() + .lines() + .collect::>() + .join(",") + ) +}); diff --git a/crates/oxide/src/scanner/detect_sources.rs b/crates/oxide/src/scanner/detect_sources.rs index 6828e8eca7c7..3a32038ddedd 100644 --- a/crates/oxide/src/scanner/detect_sources.rs +++ b/crates/oxide/src/scanner/detect_sources.rs @@ -1,16 +1,11 @@ -use crate::scanner::allowed_paths::{is_allowed_content_path, resolve_allowed_paths}; use crate::GlobEntry; use fxhash::FxHashSet; +use globwalk::DirEntry; use std::cmp::Ordering; use std::path::PathBuf; use std::sync; use walkdir::WalkDir; -#[derive(Debug, Clone)] -pub struct DetectSources { - base: PathBuf, -} - static KNOWN_EXTENSIONS: sync::LazyLock> = sync::LazyLock::new(|| { include_str!("fixtures/template-extensions.txt") .trim() @@ -22,200 +17,133 @@ static KNOWN_EXTENSIONS: sync::LazyLock> = sync::LazyLock::new .collect() }); -impl DetectSources { - pub fn new(base: PathBuf) -> Self { - Self { base } - } - - pub fn detect(&self) -> (Vec, Vec, Vec) { - let (files, dirs) = self.resolve_files(); - let globs = self.resolve_globs(&dirs); - - (files, globs, dirs) +// Sorting to make sure that we always see the directories before the files. Also sorting +// alphabetically by default. +fn sort_by_dir_and_name(a: &DirEntry, z: &DirEntry) -> Ordering { + match (a.file_type().is_dir(), z.file_type().is_dir()) { + (true, false) => Ordering::Less, + (false, true) => Ordering::Greater, + _ => a.file_name().cmp(z.file_name()), } +} - fn resolve_files(&self) -> (Vec, Vec) { - let mut files: Vec = vec![]; - let mut dirs: Vec = vec![]; - - for entry in resolve_allowed_paths(&self.base) { - let Some(file_type) = entry.file_type() else { - continue; - }; - - if file_type.is_file() { - files.push(entry.into_path()); - } else if file_type.is_dir() { - dirs.push(entry.into_path()); - } +pub fn resolve_globs( + base: PathBuf, + dirs: &[PathBuf], + extensions: &FxHashSet, +) -> Vec { + let allowed_paths: FxHashSet = FxHashSet::from_iter(dirs.iter().cloned()); + + // A list of known extensions + a list of extensions we found in the project. + let mut found_extensions: FxHashSet = + FxHashSet::from_iter(KNOWN_EXTENSIONS.iter().map(|x| x.to_string())); + found_extensions.extend(extensions.iter().cloned()); + + // A list of directory names where we can't use globs, but we should track each file + // individually instead. This is because these directories are often used for both source and + // destination files. + let forced_static_directories: FxHashSet = + FxHashSet::from_iter(vec![base.join("public")]); + + // All directories where we can safely use deeply nested globs to watch all files. + // In other comments we refer to these as "deep glob directories" or similar. + // + // E.g.: `./src/**/*.{html,js}` + let mut deep_globable_directories: FxHashSet = Default::default(); + + // All directories where we can only use shallow globs to watch all direct files but not + // folders. + // In other comments we refer to these as "shallow glob directories" or similar. + // + // E.g.: `./src/*/*.{html,js}` + let mut shallow_globable_directories: FxHashSet = Default::default(); + + // Collect all valid paths from the root. This will already filter out ignored files, unknown + // extensions and binary files. + let mut it = WalkDir::new(&base) + .sort_by(sort_by_dir_and_name) + .into_iter(); + + // Figure out all the shallow globable directories. + while let Some(Ok(entry)) = it.next() { + let path = entry.path(); + if !path.is_dir() { + continue; } - (files, dirs) - } + if !allowed_paths.contains(path) { + let mut path = path; + while let Some(parent) = path.parent() { + if parent == base { + break; + } - fn resolve_globs(&self, dirs: &Vec) -> Vec { - let allowed_paths = FxHashSet::from_iter(dirs); - - // A list of directory names where we can't use globs, but we should track each file - // individually instead. This is because these directories are often used for both source and - // destination files. - let mut forced_static_directories = vec![self.base.join("public")]; - - // A list of known extensions + a list of extensions we found in the project. - let mut found_extensions = - FxHashSet::from_iter(KNOWN_EXTENSIONS.iter().map(|x| x.to_string())); - - // All root directories. - let mut root_directories = FxHashSet::from_iter(vec![self.base.clone()]); - - // All directories where we can safely use deeply nested globs to watch all files. - // In other comments we refer to these as "deep glob directories" or similar. - // - // E.g.: `./src/**/*.{html,js}` - let mut deep_globable_directories: FxHashSet = FxHashSet::default(); - - // All directories where we can only use shallow globs to watch all direct files but not - // folders. - // In other comments we refer to these as "shallow glob directories" or similar. - // - // E.g.: `./src/*/*.{html,js}` - let mut shallow_globable_directories: FxHashSet = FxHashSet::default(); - - // Collect all valid paths from the root. This will already filter out ignored files, unknown - // extensions and binary files. - let mut it = WalkDir::new(&self.base) - // Sorting to make sure that we always see the directories before the files. Also sorting - // alphabetically by default. - .sort_by( - |a, z| match (a.file_type().is_dir(), z.file_type().is_dir()) { - (true, false) => Ordering::Less, - (false, true) => Ordering::Greater, - _ => a.file_name().cmp(z.file_name()), - }, - ) - .into_iter(); - - // We are only interested in valid entries - while let Some(Ok(entry)) = it.next() { - // Ignore known directories that we don't want to traverse into. - if entry.file_type().is_dir() && entry.file_name() == ".git" { - it.skip_current_dir(); - continue; + shallow_globable_directories.insert(parent.to_path_buf()); + path = parent } - if entry.file_type().is_dir() { - // If we are in a directory where we know that we can't use any globs, then we have to - // track each file individually. - if forced_static_directories.contains(&entry.path().to_path_buf()) { - forced_static_directories.push(entry.path().to_path_buf()); - root_directories.insert(entry.path().to_path_buf()); - continue; - } + it.skip_current_dir(); + } + } - // Although normally very unlikely, if running inside a dockerfile - // the current directory might be "/" with no parent - if let Some(parent) = entry.path().parent() { - // If we are in a directory where the parent is a forced static directory, then this - // will become a forced static directory as well. - if forced_static_directories.contains(&parent.to_path_buf()) { - forced_static_directories.push(entry.path().to_path_buf()); - root_directories.insert(entry.path().to_path_buf()); - continue; - } - } + // Figure out all the deep globable directories. + let mut it = WalkDir::new(&base) + .sort_by(sort_by_dir_and_name) + .into_iter(); - // If we are in a directory, and the directory is git ignored, then we don't have to - // descent into the directory. However, we have to make sure that we mark the _parent_ - // directory as a shallow glob directory because using deep globs from any of the - // parent directories will include this ignored directory which should not be the case. - // - // Another important part is that if one of the ignored directories is a deep glob - // directory, then all of its parents (until the root) should be marked as shallow glob - // directories as well. - if !allowed_paths.contains(&entry.path().to_path_buf()) { - let mut parent = entry.path().parent(); - while let Some(parent_path) = parent { - // If the parent is already marked as a valid deep glob directory, then we have - // to mark it as a shallow glob directory instead, because we won't be able to - // use deep globs for this directory anymore. - if deep_globable_directories.contains(parent_path) { - deep_globable_directories.remove(parent_path); - shallow_globable_directories.insert(parent_path.to_path_buf()); - } - - // If we reached the root, then we can stop. - if parent_path == self.base { - break; - } - - // Mark the parent directory as a shallow glob directory and continue with its - // parent. - shallow_globable_directories.insert(parent_path.to_path_buf()); - parent = parent_path.parent(); - } - - it.skip_current_dir(); - continue; - } + while let Some(Ok(entry)) = it.next() { + let path = entry.path(); + if path.is_file() { + continue; + } - // If we are in a directory that is not git ignored, then we can mark this directory as - // a valid deep glob directory. This is only necessary if any of its parents aren't - // marked as deep glob directories already. - let mut found_deep_glob_parent = false; - let mut parent = entry.path().parent(); - while let Some(parent_path) = parent { - // If we reached the root, then we can stop. - if parent_path == self.base { - break; - } - - // If the parent is already marked as a deep glob directory, then we can stop - // because this glob will match the current directory already. - if deep_globable_directories.contains(parent_path) { - found_deep_glob_parent = true; - break; - } - - parent = parent_path.parent(); - } + if path == base { + continue; + } - // If we didn't find a deep glob directory parent, then we can mark this directory as a - // deep glob directory (unless it is the root). - if !found_deep_glob_parent && entry.path() != self.base { - deep_globable_directories.insert(entry.path().to_path_buf()); - } - } + if !allowed_paths.contains(path) { + continue; + } - // Handle allowed content paths - if is_allowed_content_path(entry.path()) - && allowed_paths.contains(&entry.path().to_path_buf()) - { - let path = entry.path(); + // Already marked as a shallow globable directory. + if shallow_globable_directories.contains(path) { + continue; + } - // Collect the extension for future use when building globs. - if let Some(extension) = path.extension().and_then(|x| x.to_str()) { - found_extensions.insert(extension.to_string()); - } - } + if forced_static_directories.contains(path) { + it.skip_current_dir(); + continue; } - let mut extension_list = found_extensions.into_iter().collect::>(); + // Track deep globable directories. + deep_globable_directories.insert(path.to_path_buf()); + it.skip_current_dir(); + } - extension_list.sort(); + let mut extension_list = found_extensions.clone().into_iter().collect::>(); - let extension_list = extension_list.join(","); + extension_list.sort(); - // Build the globs for all globable directories. - let shallow_globs = shallow_globable_directories.iter().map(|path| GlobEntry { - base: path.display().to_string(), - pattern: format!("*/*.{{{}}}", extension_list), - }); + let extension_list = extension_list.join(","); - let deep_globs = deep_globable_directories.iter().map(|path| GlobEntry { - base: path.display().to_string(), - pattern: format!("**/*.{{{}}}", extension_list), - }); + // Build the globs for all globable directories. + let shallow_globs = shallow_globable_directories.iter().map(|path| GlobEntry { + base: path.display().to_string(), + pattern: format!("*/*.{{{}}}", extension_list), + }); - shallow_globs.chain(deep_globs).collect::>() - } + let deep_globs = deep_globable_directories.iter().map(|path| GlobEntry { + base: path.display().to_string(), + pattern: format!("**/*.{{{}}}", extension_list), + }); + + shallow_globs + .chain(deep_globs) + // Insert a glob for the base path, so we can see new files/folders in the directory + // itself + .chain(vec![GlobEntry { + base: base.to_string_lossy().into(), + pattern: "*".into(), + }]) + .collect::>() } diff --git a/crates/oxide/src/scanner/fixtures/ignored-content-dirs.txt b/crates/oxide/src/scanner/fixtures/ignored-content-dirs.txt new file mode 100644 index 000000000000..85dcc16df69a --- /dev/null +++ b/crates/oxide/src/scanner/fixtures/ignored-content-dirs.txt @@ -0,0 +1,2 @@ +.git +node_modules diff --git a/crates/oxide/src/scanner/fixtures/ignored-files.txt b/crates/oxide/src/scanner/fixtures/ignored-files.txt index 45d4ced87afd..d2d231ec7b0d 100644 --- a/crates/oxide/src/scanner/fixtures/ignored-files.txt +++ b/crates/oxide/src/scanner/fixtures/ignored-files.txt @@ -1,3 +1,4 @@ package-lock.json pnpm-lock.yaml bun.lockb +.gitignore diff --git a/crates/oxide/src/scanner/mod.rs b/crates/oxide/src/scanner/mod.rs index 8ddf60fd0ef2..445dfc98372b 100644 --- a/crates/oxide/src/scanner/mod.rs +++ b/crates/oxide/src/scanner/mod.rs @@ -1,2 +1,704 @@ -pub mod allowed_paths; +pub mod auto_source_detection; pub mod detect_sources; +pub mod sources; + +use crate::extractor::{Extracted, Extractor}; +use crate::glob::optimize_patterns; +use crate::scanner::detect_sources::resolve_globs; +use crate::scanner::sources::{ + public_source_entries_to_private_source_entries, PublicSourceEntry, SourceEntry, Sources, +}; +use crate::GlobEntry; +use auto_source_detection::BINARY_EXTENSIONS_GLOB; +use bstr::ByteSlice; +use fast_glob::glob_match; +use fxhash::{FxHashMap, FxHashSet}; +use ignore::{gitignore::GitignoreBuilder, WalkBuilder}; +use rayon::prelude::*; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::Path; +use std::path::PathBuf; +use std::sync::{self, Arc, Mutex}; +use std::time::SystemTime; +use tracing::event; + +// @source "some/folder"; // This is auto source detection +// @source "some/folder/**/*"; // This is auto source detection +// @source "some/folder/*.html"; // This is just a glob, but new files matching this should be included +// @source "node_modules/my-ui-lib"; // Auto source detection but since node_modules is explicit we allow it +// // Maybe could be considered `external(…)` automatically if: +// // 1. It's git ignored but listed explicitly +// // 2. It exists outside of the current working directory (do we know that?) +// +// @source "do-include-me.bin"; // `.bin` is typically ignored, but now it's explicit so should be included +// @source "git-ignored.html"; // A git ignored file that is listed explicitly, should be scanned +static SHOULD_TRACE: sync::LazyLock = sync::LazyLock::new( + || matches!(std::env::var("DEBUG"), Ok(value) if value.eq("*") || (value.contains("tailwindcss:oxide") && !value.contains("-tailwindcss:oxide"))), +); + +fn init_tracing() { + if !*SHOULD_TRACE { + return; + } + + _ = tracing_subscriber::fmt() + .with_max_level(tracing::Level::INFO) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) + .compact() + .try_init(); +} + +#[derive(Debug, Clone)] +pub enum ChangedContent { + File(PathBuf, String), + Content(String, String), +} + +#[derive(Debug, Clone)] +pub struct ScanOptions { + /// Base path to start scanning from + pub base: Option, + + /// Glob sources + pub sources: Vec, +} + +#[derive(Debug, Clone)] +pub struct ScanResult { + pub candidates: Vec, + pub files: Vec, + pub globs: Vec, +} + +#[derive(Debug, Clone, Default)] +pub struct Scanner { + /// Content sources + sources: Sources, + + /// The walker to detect all files that we have to scan + walker: Option, + + /// All changed content that we have to parse + changed_content: Vec, + + /// All found extensions + extensions: FxHashSet, + + /// All files that we have to scan + files: Vec, + + /// All directories, sub-directories, etc… we saw during source detection + dirs: Vec, + + /// All generated globs, used for setting up watchers + globs: Vec, + + /// Track unique set of candidates + candidates: FxHashSet, +} + +impl Scanner { + pub fn new(sources: Vec) -> Self { + let sources = Sources::new(public_source_entries_to_private_source_entries(sources)); + + Self { + sources: sources.clone(), + walker: create_walker(sources), + ..Default::default() + } + } + + pub fn scan(&mut self) -> Vec { + init_tracing(); + self.scan_sources(); + + // TODO: performance improvement, bail early if we don't have any changed content + // if self.changed_content.is_empty() { + // return vec![]; + // } + + let _new_candidates = self.extract_candidates(); + + // Make sure we have a sorted list of candidates + let mut candidates = self.candidates.iter().cloned().collect::>(); + candidates.par_sort_unstable(); + + // Return all candidates instead of only the new ones + candidates + } + + #[tracing::instrument(skip_all)] + pub fn scan_content(&mut self, changed_content: Vec) -> Vec { + let (changed_files, changed_contents) = + changed_content + .into_iter() + .partition::, _>(|x| match x { + ChangedContent::File(_, _) => true, + ChangedContent::Content(_, _) => false, + }); + + // Raw content can be parsed directly, no need to verify if the file exists and is allowed + // to be scanned. + self.changed_content.extend(changed_contents); + + // Fully resolve all files + let changed_files = changed_files + .into_iter() + .filter_map(|changed_content| match changed_content { + ChangedContent::File(file, extension) => { + let Ok(file) = dunce::canonicalize(file) else { + return None; + }; + Some(ChangedContent::File(file, extension)) + } + _ => unreachable!(), + }) + .collect::>(); + + let (known_files, mut new_unknown_files) = changed_files + .into_iter() + .partition::, _>(|changed_file| match changed_file { + ChangedContent::Content(_, _) => unreachable!(), + ChangedContent::File(file, _) => self.files.contains(file), + }); + + // All known files are allowed to be scanned + self.changed_content.extend(known_files); + + // Figure out if the new unknown files are allowed to be scanned + if !new_unknown_files.is_empty() { + if let Some(walk_builder) = &mut self.walker { + for entry in walk_builder.build().filter_map(Result::ok) { + let path = entry.path(); + if !path.is_file() { + continue; + } + + let mut drop_file_indexes = vec![]; + for (idx, changed_file) in new_unknown_files.iter().enumerate().rev() { + let ChangedContent::File(file, _) = changed_file else { + continue; + }; + + // When the file is found on disk it means that all the rules pass. We can + // extract the current file and remove it from the list of passed in files. + if file == path { + self.files.push(path.to_path_buf()); // Track for future use + self.changed_content.push(changed_file.clone()); // Track for parsing + drop_file_indexes.push(idx); + } + } + + // Remove all files that we found on disk + if !drop_file_indexes.is_empty() { + drop_file_indexes.into_iter().for_each(|idx| { + new_unknown_files.remove(idx); + }); + } + + // We can stop walking the file system if all files we are interested in have + // been found. + if new_unknown_files.is_empty() { + break; + } + } + } + } + + self.extract_candidates() + } + + #[tracing::instrument(skip_all)] + fn extract_candidates(&mut self) -> Vec { + let changed_content = self.changed_content.drain(..).collect::>(); + + let candidates = parse_all_blobs(read_all_files(changed_content)); + + // Only compute the new candidates and ignore the ones we already have. This is for + // subsequent calls to prevent serializing the entire set of candidates every time. + let mut new_candidates = candidates + .into_par_iter() + .filter(|candidate| !self.candidates.contains(candidate)) + .collect::>(); + + new_candidates.par_sort_unstable(); + + // Track new candidates for subsequent calls + self.candidates.par_extend(new_candidates.clone()); + + new_candidates + } + + #[tracing::instrument(skip_all)] + fn scan_sources(&mut self) { + let Some(walker) = &mut self.walker else { + return; + }; + + for entry in walker.build().filter_map(Result::ok) { + let path = entry.into_path(); + let Ok(metadata) = path.metadata() else { + continue; + }; + if metadata.is_dir() { + self.dirs.push(path); + } else if metadata.is_file() { + let extension = path + .extension() + .and_then(|x| x.to_str()) + .unwrap_or_default(); // In case the file has no extension + + self.extensions.insert(extension.to_owned()); + self.changed_content.push(ChangedContent::File( + path.to_path_buf(), + extension.to_owned(), + )); + + self.files.push(path); + } + } + } + + #[tracing::instrument(skip_all)] + pub fn get_files(&mut self) -> Vec { + self.scan_sources(); + + self.files + .par_iter() + .filter_map(|x| x.clone().into_os_string().into_string().ok()) + .collect() + } + + #[tracing::instrument(skip_all)] + pub fn get_globs(&mut self) -> Vec { + self.scan_sources(); + + for source in self.sources.iter() { + match source { + SourceEntry::Auto { base } | SourceEntry::External { base } => { + let globs = resolve_globs((base).to_path_buf(), &self.dirs, &self.extensions); + self.globs.extend(globs); + } + SourceEntry::Pattern { base, pattern } => { + self.globs.push(GlobEntry { + base: base.to_string_lossy().to_string(), + pattern: pattern.to_string(), + }); + } + _ => {} + } + } + + // Re-optimize the globs to reduce the number of patterns we have to scan. + self.globs = optimize_patterns(&self.globs); + + self.globs.clone() + } + + #[tracing::instrument(skip_all)] + pub fn get_normalized_sources(&mut self) -> Vec { + self.sources + .iter() + .filter_map(|source| match source { + SourceEntry::Auto { base } | SourceEntry::External { base } => Some(GlobEntry { + base: base.to_string_lossy().to_string(), + pattern: "**/*".to_string(), + }), + SourceEntry::Pattern { base, pattern } => Some(GlobEntry { + base: base.to_string_lossy().to_string(), + pattern: pattern.to_string(), + }), + _ => None, + }) + .collect() + } + + #[tracing::instrument(skip_all)] + pub fn get_candidates_with_positions( + &mut self, + changed_content: ChangedContent, + ) -> Vec<(String, usize)> { + let content = read_changed_content(changed_content).unwrap_or_default(); + let original_content = &content; + + // Workaround for legacy upgrades: + // + // `-[]` won't parse in the new parser (`[…]` must contain _something_), but we do need it + // for people using `group-[]` (which we will later replace with `in-[.group]` instead). + let content = content.replace("-[]", "XYZ"); + let offset = content.as_ptr() as usize; + + let mut extractor = Extractor::new(&content[..]); + + extractor + .extract() + .into_par_iter() + .flat_map(|extracted| match extracted { + Extracted::Candidate(s) => { + let i = s.as_ptr() as usize - offset; + let original = &original_content[i..i + s.len()]; + if original.contains_str("-[]") { + return Some(unsafe { + (String::from_utf8_unchecked(original.to_vec()), i) + }); + } + + // SAFETY: When we parsed the candidates, we already guaranteed that the byte + // slices are valid, therefore we don't have to re-check here when we want to + // convert it back to a string. + Some(unsafe { (String::from_utf8_unchecked(s.to_vec()), i) }) + } + + _ => None, + }) + .collect() + } +} + +fn read_changed_content(c: ChangedContent) -> Option> { + let (content, extension) = match c { + ChangedContent::File(file, extension) => match std::fs::read(&file) { + Ok(content) => (content, extension), + Err(e) => { + event!(tracing::Level::ERROR, "Failed to read file: {:?}", e); + return None; + } + }, + + ChangedContent::Content(contents, extension) => (contents.into_bytes(), extension), + }; + + Some(pre_process_input(&content, &extension)) +} + +pub fn pre_process_input(content: &[u8], extension: &str) -> Vec { + use crate::extractor::pre_processors::*; + + match extension { + "clj" | "cljs" | "cljc" => Clojure.process(content), + "cshtml" | "razor" => Razor.process(content), + "haml" => Haml.process(content), + "json" => Json.process(content), + "pug" => Pug.process(content), + "rb" | "erb" => Ruby.process(content), + "slim" => Slim.process(content), + "svelte" => Svelte.process(content), + "vue" => Vue.process(content), + _ => content.to_vec(), + } +} + +#[tracing::instrument(skip_all)] +fn read_all_files(changed_content: Vec) -> Vec> { + event!( + tracing::Level::INFO, + "Reading {:?} file(s)", + changed_content.len() + ); + + changed_content + .into_par_iter() + .filter_map(read_changed_content) + .collect() +} + +#[tracing::instrument(skip_all)] +fn parse_all_blobs(blobs: Vec>) -> Vec { + let mut result: Vec<_> = blobs + .par_iter() + .flat_map(|blob| blob.par_split(|x| *x == b'\n')) + .filter_map(|blob| { + if blob.is_empty() { + return None; + } + + let extracted = crate::extractor::Extractor::new(blob).extract(); + if extracted.is_empty() { + return None; + } + + Some(FxHashSet::from_iter(extracted.into_iter().map( + |x| match x { + Extracted::Candidate(bytes) => bytes, + Extracted::CssVariable(bytes) => bytes, + }, + ))) + }) + .reduce(Default::default, |mut a, b| { + a.extend(b); + a + }) + .into_iter() + .map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) }) + .collect(); + + // SAFETY: Unstable sort is faster and in this scenario it's also safe because we are + // guaranteed to have unique candidates. + result.par_sort_unstable(); + + result +} + +/// Create a walker for the given sources to detect all the files that we have to scan. +/// +/// The `mtimes` map is used to keep track of the last modified time of each file. This is used to +/// determine if a file or folder has changed since the last scan and we can skip folders that +/// haven't changed. +fn create_walker(sources: Sources) -> Option { + let mtimes: Arc>> = Default::default(); + let mut other_roots: FxHashSet<&PathBuf> = FxHashSet::default(); + let mut first_root: Option<&PathBuf> = None; + let mut ignores: BTreeMap<&PathBuf, BTreeSet> = Default::default(); + + for source in sources.iter() { + match source { + SourceEntry::Auto { base } => { + if first_root.is_none() { + first_root = Some(base); + } else { + other_roots.insert(base); + } + } + SourceEntry::Pattern { base, pattern } => { + let mut pattern = pattern.to_string(); + + if first_root.is_none() { + first_root = Some(base); + } else { + other_roots.insert(base); + } + + if !pattern.contains("**") { + // Ensure that the pattern is pinned to the base path. + if !pattern.starts_with("/") { + pattern = format!("/{pattern}"); + } + + // Specific patterns should take precedence even over git-ignored files: + ignores + .entry(base) + .or_default() + .insert(format!("!{}", pattern)); + } else { + // Assumption: the pattern we receive will already be brace expanded. So + // `*.{html,jsx}` will result in two separate patterns: `*.html` and `*.jsx`. + if let Some(extension) = Path::new(&pattern).extension() { + // Extend auto source detection to include the extension + ignores + .entry(base) + .or_default() + .insert(format!("!*.{}", extension.to_string_lossy())); + } + } + } + SourceEntry::Ignored { base, pattern } => { + let mut pattern = pattern.to_string(); + // Ensure that the pattern is pinned to the base path. + if !pattern.starts_with("/") { + pattern = format!("/{pattern}"); + } + ignores.entry(base).or_default().insert(pattern); + } + SourceEntry::External { base } => { + if first_root.is_none() { + first_root = Some(base); + } else { + other_roots.insert(base); + } + + // External sources should take precedence even over git-ignored files: + ignores + .entry(base) + .or_default() + .insert(format!("!{}", "/**/*")); + + // External sources should still disallow binary extensions: + ignores + .entry(base) + .or_default() + .insert(BINARY_EXTENSIONS_GLOB.clone()); + } + } + } + + let mut builder = WalkBuilder::new(first_root?); + + // Scan hidden files / directories + builder.hidden(false); + + // Don't respect global gitignore files + builder.git_global(false); + + // By default, allow .gitignore files to be used regardless of whether or not + // a .git directory is present. This is an optimization for when projects + // are first created and may not be in a git repo yet. + builder.require_git(false); + + // If we are in a git repo then require it to ensure that only rules within + // the repo are used. For example, we don't want to consider a .gitignore file + // in the user's home folder if we're in a git repo. + // + // The alternative is using a call like `.parents(false)` but that will + // prevent looking at parent directories for .gitignore files from within + // the repo and that's not what we want. + // + // For example, in a project with this structure: + // + // home + // .gitignore + // my-project + // .gitignore + // apps + // .gitignore + // web + // {root} + // + // We do want to consider all .gitignore files listed: + // - home/.gitignore + // - my-project/.gitignore + // - my-project/apps/.gitignore + // + // However, if a repo is initialized inside my-project then only the following + // make sense for consideration: + // - my-project/.gitignore + // - my-project/apps/.gitignore + // + // Setting the require_git(true) flag conditionally allows us to do this. + for parent in first_root?.ancestors() { + if parent.join(".git").exists() { + builder.require_git(true); + break; + } + } + + for root in other_roots { + builder.add(root); + } + + // Setup auto source detection rules + builder.add_gitignore(auto_source_detection::RULES.clone()); + + // Setup ignores based on `@source` definitions + for (base, patterns) in ignores { + let mut ignore_builder = GitignoreBuilder::new(base); + for pattern in patterns { + ignore_builder.add_line(None, &pattern).unwrap(); + } + let ignore = ignore_builder.build().unwrap(); + builder.add_gitignore(ignore); + } + + builder.filter_entry({ + move |entry| { + let path = entry.path(); + + // Ensure the entries are matching any of the provided source patterns (this is + // necessary for manual-patterns that can filter the file extension) + if path.is_file() { + let mut matches = false; + for source in sources.iter() { + match source { + SourceEntry::Auto { base } | SourceEntry::External { base } => { + if path.starts_with(base) { + matches = true; + break; + } + } + SourceEntry::Pattern { base, pattern } => { + let mut pattern = pattern.to_string(); + // Ensure that the pattern is pinned to the base path. + if !pattern.starts_with("/") { + pattern = format!("/{pattern}"); + } + + // Check if path starts with base, if so, remove the prefix and check the remainder against the pattern + let remainder = path.strip_prefix(base); + if remainder.is_ok_and(|remainder| { + let mut path_str = remainder.to_string_lossy().to_string(); + if !path_str.starts_with("/") { + path_str = format!("/{path_str}"); + } + glob_match(pattern, path_str.as_bytes()) + }) { + matches = true; + break; + } + } + _ => {} + } + } + + if !matches { + return false; + } + } + + let mut mtimes = mtimes.lock().unwrap(); + let current_time = match entry.metadata() { + Ok(metadata) if metadata.is_file() => { + if let Ok(time) = metadata.modified() { + Some(time) + } else { + None + } + } + _ => None, + }; + + let previous_time = + current_time.and_then(|time| mtimes.insert(entry.clone().into_path(), time)); + + match (current_time, previous_time) { + (Some(current), Some(prev)) if prev == current => false, + _ => true, + } + } + }); + + Some(builder) +} + +#[cfg(test)] +mod tests { + use super::{ChangedContent, Scanner}; + + #[test] + fn test_positions() { + let mut scanner = Scanner::new(vec![]); + + for (input, expected) in [ + // Before migrations + ( + r#"
"#, + vec![ + ("class".to_string(), 5), + ("tw:flex!".to_string(), 12), + ("tw:sm:block!".to_string(), 21), + ("tw:bg-linear-to-t".to_string(), 34), + ("flex".to_string(), 52), + ("tw:[color:red]".to_string(), 57), + ("tw:in-[.tw\\:group]:flex".to_string(), 72), + ], + ), + ] { + let candidates = scanner.get_candidates_with_positions(ChangedContent::Content( + input.to_string(), + "html".into(), + )); + assert_eq!(candidates, expected); + } + } +} diff --git a/crates/oxide/src/scanner/sources.rs b/crates/oxide/src/scanner/sources.rs new file mode 100644 index 000000000000..450fcb7c2873 --- /dev/null +++ b/crates/oxide/src/scanner/sources.rs @@ -0,0 +1,313 @@ +use crate::glob::split_pattern; +use crate::GlobEntry; +use bexpand::Expression; +use std::path::PathBuf; +use tracing::{event, Level}; + +use super::auto_source_detection::IGNORED_CONTENT_DIRS; + +#[derive(Debug, Clone)] +pub struct PublicSourceEntry { + /// Base path of the glob + pub base: String, + + /// Glob pattern + pub pattern: String, + + /// Negated flag + pub negated: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum SourceEntry { + /// Auto source detection + /// + /// Represented by: + /// + /// ```css + /// @source "src";` + /// @source "src/**/*";` + /// ``` + Auto { base: PathBuf }, + + /// Explicit source pattern regardless of any auto source detection rules + /// + /// Represented by: + /// + /// ```css + /// @source "src/**/*.html";` + /// ``` + Pattern { base: PathBuf, pattern: String }, + + /// Ignored pattern + /// + /// Represented by: + /// + /// ```css + /// @source not "src";` + /// @source not "src/**/*.html";` + /// ``` + Ignored { base: PathBuf, pattern: String }, + + /// External sources are sources outside of your git root which should not + /// follow gitignore rules. + /// + /// Represented by: + /// + /// ```css + /// @source "../node_modules/my-lib";` + /// ``` + External { base: PathBuf }, +} + +#[derive(Debug, Clone, Default)] +pub struct Sources { + sources: Vec, +} + +impl Sources { + pub fn new(sources: Vec) -> Self { + Self { sources } + } + + pub fn iter(&self) -> impl Iterator { + self.sources.iter() + } +} + +impl PublicSourceEntry { + /// Optimize the PublicSourceEntry by trying to move all the static parts of the pattern to the + /// base of the PublicSourceEntry. + /// + /// ```diff + /// - { base: '/', pattern: 'src/**/*.html'} + /// + { base: '/src', pattern: '**/*.html'} + /// ``` + /// + /// A file stays in the `pattern` part, because the `base` should only be a directory. + /// + /// ```diff + /// - { base: '/', pattern: 'src/examples/index.html'} + /// + { base: '/src/examples', pattern: 'index.html'} + /// ``` + /// + /// A folder will be moved to the `base` part, and the `pattern` will be set to `**/*`. + /// + /// ```diff + /// - { base: '/', pattern: 'src/examples'} + /// + { base: '/src/examples', pattern: '**/*'} + /// ``` + /// + /// In addition, we will canonicalize the base path so we always work with the correctly + /// resolved path. + pub fn optimize(&mut self) { + // Resolve base path immediately + let Ok(base) = dunce::canonicalize(&self.base) else { + event!(Level::ERROR, "Failed to resolve base: {:?}", self.base); + return; + }; + self.base = base.to_string_lossy().to_string(); + + // No dynamic part, figure out if we are dealing with a file or a directory. + if !self.pattern.contains('*') { + let combined_path = if self.pattern.starts_with("/") { + PathBuf::from(&self.pattern) + } else { + PathBuf::from(&self.base).join(&self.pattern) + }; + + match dunce::canonicalize(combined_path) { + Ok(resolved_path) if resolved_path.is_dir() => { + self.base = resolved_path.to_string_lossy().to_string(); + self.pattern = "**/*".to_owned(); + } + Ok(resolved_path) if resolved_path.is_file() => { + self.base = resolved_path + .parent() + .unwrap() + .to_string_lossy() + .to_string(); + // Ensure leading slash, otherwise it will match against all files in all folders/ + self.pattern = format!( + "/{}", + resolved_path + .file_name() + .unwrap() + .to_string_lossy() + .to_string() + ); + } + _ => {} + } + return; + } + + // Contains dynamic part + let (static_part, dynamic_part) = split_pattern(&self.pattern); + + let base: PathBuf = self.base.clone().into(); + let base = match static_part { + Some(static_part) => { + // TODO: If the base does not exist on disk, try removing the last slash and try + // again. + match dunce::canonicalize(base.join(static_part)) { + Ok(base) => base, + Err(err) => { + event!(tracing::Level::ERROR, "Failed to resolve glob: {:?}", err); + return; + } + } + } + None => base, + }; + + let pattern = match dynamic_part { + Some(dynamic_part) => dynamic_part, + None => { + if base.is_dir() { + "**/*".to_owned() + } else { + "".to_owned() + } + } + }; + + self.base = base.to_string_lossy().to_string(); + self.pattern = pattern; + } +} + +/// For each public source entry: +/// +/// 1. Perform brace expansion +/// +/// ```diff +/// - { base: '/', pattern: 'src/{foo,bar}.html'} +/// + { base: '/', pattern: 'src/foo.html'} +/// + { base: '/', pattern: 'src/bar.html'} +/// ``` +/// +/// 2. Hoist static parts, e.g.: +/// +/// ```diff +/// - { base: '/', pattern: 'src/**/*.html'} +/// + { base: '/src', pattern: '**/*.html'} +/// ``` +/// +/// 3. Convert to private SourceEntry +/// +pub fn public_source_entries_to_private_source_entries( + sources: Vec, +) -> Vec { + // Perform brace expansion + let expanded_globs = sources + .into_iter() + .flat_map(|source| { + let expression: Result = source.pattern[..].try_into(); + let Ok(expression) = expression else { + return vec![source]; + }; + + expression + .into_iter() + .filter_map(Result::ok) + .map(move |pattern| PublicSourceEntry { + base: source.base.clone(), + pattern: pattern.into(), + negated: source.negated, + }) + .collect::>() + }) + .map(|mut public_source| { + public_source.optimize(); + public_source + }) + .collect::>(); + + // Convert from public SourceEntry to private SourceEntry + expanded_globs + .into_iter() + .map(Into::into) + .collect::>() +} + +/// Convert a public source entry to a source entry +impl From for SourceEntry { + fn from(value: PublicSourceEntry) -> Self { + let auto = value.pattern.ends_with("**/*") + || PathBuf::from(&value.base).join(&value.pattern).is_dir(); + + let inside_ignored_content_dir = IGNORED_CONTENT_DIRS.iter().any(|dir| { + value.base.contains(&format!( + "{}{}{}", + std::path::MAIN_SEPARATOR, + dir, + std::path::MAIN_SEPARATOR + )) + }); + + match (value.negated, auto, inside_ignored_content_dir) { + (false, true, false) => SourceEntry::Auto { + base: value.base.into(), + }, + (false, true, true) => SourceEntry::External { + base: value.base.into(), + }, + (false, false, _) => SourceEntry::Pattern { + base: value.base.into(), + pattern: value.pattern, + }, + (true, _, _) => SourceEntry::Ignored { + base: value.base.into(), + pattern: value.pattern, + }, + } + } +} + +impl From for SourceEntry { + fn from(value: GlobEntry) -> Self { + SourceEntry::Pattern { + base: PathBuf::from(value.base), + pattern: value.pattern, + } + } +} + +impl From for GlobEntry { + fn from(value: SourceEntry) -> Self { + match value { + SourceEntry::Auto { base } | SourceEntry::External { base } => GlobEntry { + base: base.to_string_lossy().into(), + pattern: "**/*".into(), + }, + SourceEntry::Pattern { base, pattern } => GlobEntry { + base: base.to_string_lossy().into(), + pattern: pattern.clone(), + }, + SourceEntry::Ignored { base, pattern } => GlobEntry { + base: base.to_string_lossy().into(), + pattern: pattern.clone(), + }, + } + } +} + +impl From<&SourceEntry> for GlobEntry { + fn from(value: &SourceEntry) -> Self { + match value { + SourceEntry::Auto { base } | SourceEntry::External { base } => GlobEntry { + base: base.to_string_lossy().into(), + pattern: "**/*".into(), + }, + SourceEntry::Pattern { base, pattern } => GlobEntry { + base: base.to_string_lossy().into(), + pattern: pattern.clone(), + }, + SourceEntry::Ignored { base, pattern } => GlobEntry { + base: base.to_string_lossy().into(), + pattern: pattern.clone(), + }, + } + } +} diff --git a/crates/oxide/tests/scanner.rs b/crates/oxide/tests/scanner.rs index c5556aaa262e..03daee0ebafb 100644 --- a/crates/oxide/tests/scanner.rs +++ b/crates/oxide/tests/scanner.rs @@ -1,5 +1,6 @@ #[cfg(test)] mod scanner { + use std::path::PathBuf; use std::process::Command; use std::thread::sleep; use std::time::Duration; @@ -8,6 +9,33 @@ mod scanner { use tailwindcss_oxide::*; use tempfile::tempdir; + fn public_source_entry_from_pattern(dir: PathBuf, pattern: &str) -> PublicSourceEntry { + let mut parts = pattern.split_whitespace(); + let _ = parts.next().unwrap_or_default(); + let not_or_pattern = parts.next().unwrap_or_default(); + if not_or_pattern == "not" { + let pattern = parts.next().unwrap_or_default(); + return PublicSourceEntry { + base: dir.to_string_lossy().into(), + pattern: pattern[1..pattern.len() - 1].to_string(), + negated: true, + }; + } + + PublicSourceEntry { + base: dir.to_string_lossy().into(), + pattern: not_or_pattern[1..not_or_pattern.len() - 1].to_string(), + negated: false, + } + } + + struct ScanResult { + files: Vec, + globs: Vec, + normalized_sources: Vec, + candidates: Vec, + } + fn create_files_in(dir: &path::Path, paths: &[(&str, &str)]) { // Create the necessary files for (path, contents) in paths { @@ -24,8 +52,8 @@ mod scanner { fn scan_with_globs( paths_with_content: &[(&str, &str)], - globs: Vec<&str>, - ) -> (Vec, Vec) { + source_directives: Vec<&str>, + ) -> ScanResult { // Create a temporary working directory let dir = tempdir().unwrap().into_path(); @@ -38,101 +66,147 @@ mod scanner { let base = format!("{}", dir.display()).replace('\\', "/"); // Resolve all content paths for the (temporary) current working directory - let mut sources: Vec = globs + let sources: Vec = source_directives .iter() - .map(|x| GlobEntry { - base: base.clone(), - pattern: x.to_string(), - }) + .map(|str| public_source_entry_from_pattern(base.clone().into(), str)) .collect(); - sources.push(GlobEntry { - base: base.clone(), - pattern: "**/*".to_string(), - }); - - let mut scanner = Scanner::new(Some(sources)); + let mut scanner = Scanner::new(sources); let candidates = scanner.scan(); - let mut paths: Vec<_> = scanner.get_files(); - - for glob in scanner.get_globs() { - paths.push(format!("{}{}{}", glob.base, "/", glob.pattern)); - } - - let parent_dir = + let base_dir = format!("{}{}", dunce::canonicalize(&base).unwrap().display(), "/").replace('\\', "/"); - paths = paths - .into_iter() - .map(|x| { - // Normalize paths to use unix style separators - x.replace('\\', "/").replace(&parent_dir, "") + // Get all scanned files as strings relative to the base directory + let mut files = scanner + .get_files() + .iter() + // Normalize paths to use unix style separators + .map(|file| file.replace('\\', "/").replace(&base_dir, "")) + .collect::>(); + files.sort(); + + // Get all scanned globs as strings relative to the base directory + let mut globs = scanner + .get_globs() + .iter() + .map(|glob| { + if glob.pattern.starts_with('/') { + format!("{}{}", glob.base, glob.pattern) + } else { + format!("{}/{}", glob.base, glob.pattern) + } }) - .collect(); - - // Sort the output for easier comparison (depending on internal data structure the order - // _could_ be random) - paths.sort(); - - (paths, candidates) - } + // Normalize paths to use unix style separators + .map(|file| file.replace('\\', "/").replace(&base_dir, "")) + .collect::>(); + globs.sort(); + + // Get all normalized sources as strings relative to the base directory + let mut normalized_sources = scanner + .get_normalized_sources() + .iter() + .map(|glob| { + if glob.pattern.starts_with('/') { + format!("{}{}", glob.base, glob.pattern) + } else { + format!("{}/{}", glob.base, glob.pattern) + } + }) + // Normalize paths to use unix style separators + .map(|file| file.replace('\\', "/").replace(&base_dir, "")) + .collect::>(); + normalized_sources.sort(); - fn scan(paths_with_content: &[(&str, &str)]) -> (Vec, Vec) { - scan_with_globs(paths_with_content, vec![]) + ScanResult { + files, + globs, + normalized_sources, + candidates, + } } - fn test(paths_with_content: &[(&str, &str)]) -> Vec { - scan(paths_with_content).0 + fn scan(paths_with_content: &[(&str, &str)]) -> ScanResult { + scan_with_globs(paths_with_content, vec!["@source '**/*'"]) } #[test] fn it_should_work_with_a_set_of_root_files() { - let globs = test(&[ + let ScanResult { + files, + globs, + normalized_sources, + .. + } = scan(&[ ("index.html", ""), ("a.html", ""), ("b.html", ""), ("c.html", ""), ]); - assert_eq!(globs, vec!["*", "a.html", "b.html", "c.html", "index.html"]); + assert_eq!(files, vec!["a.html", "b.html", "c.html", "index.html"]); + assert_eq!(globs, vec!["*"]); + assert_eq!(normalized_sources, vec!["**/*"]); } #[test] fn it_should_work_with_a_set_of_root_files_and_ignore_ignored_files() { - let globs = test(&[ + let ScanResult { + files, + globs, + normalized_sources, + .. + } = scan(&[ (".gitignore", "b.html"), ("index.html", ""), ("a.html", ""), ("b.html", ""), ("c.html", ""), ]); - assert_eq!(globs, vec!["*", "a.html", "c.html", "index.html"]); + assert_eq!(files, vec!["a.html", "c.html", "index.html"]); + assert_eq!(globs, vec!["*"]); + assert_eq!(normalized_sources, vec!["**/*"]); } #[test] fn it_should_list_all_files_in_the_public_folder_explicitly() { - let globs = test(&[ + let ScanResult { + files, + globs, + normalized_sources, + .. + } = scan(&[ ("index.html", ""), ("public/a.html", ""), ("public/b.html", ""), ("public/c.html", ""), + ("public/nested/c.html", ""), + ("public/deeply/nested/c.html", ""), ]); + assert_eq!( - globs, + files, vec![ - "*", "index.html", "public/a.html", "public/b.html", "public/c.html", + "public/deeply/nested/c.html", + "public/nested/c.html", ] ); + assert_eq!(globs, vec!["*"]); + assert_eq!(normalized_sources, vec!["**/*"]); } #[test] fn it_should_list_nested_folders_explicitly_in_the_public_folder() { - let globs = test(&[ + let ScanResult { + files, + globs, + normalized_sources, + .. + } = scan(&[ ("index.html", ""), ("public/a.html", ""), ("public/b.html", ""), @@ -143,10 +217,10 @@ mod scanner { ("public/nested/again/a.html", ""), ("public/very/deeply/nested/a.html", ""), ]); + assert_eq!( - globs, + files, vec![ - "*", "index.html", "public/a.html", "public/b.html", @@ -158,72 +232,133 @@ mod scanner { "public/very/deeply/nested/a.html", ] ); + assert_eq!(globs, vec!["*",]); + assert_eq!(normalized_sources, vec!["**/*"]); } #[test] fn it_should_list_all_files_in_the_public_folder_explicitly_except_ignored_files() { - let globs = test(&[ + let ScanResult { + files, + globs, + normalized_sources, + .. + } = scan(&[ (".gitignore", "public/b.html\na.html"), ("index.html", ""), ("public/a.html", ""), ("public/b.html", ""), ("public/c.html", ""), ]); - assert_eq!(globs, vec!["*", "index.html", "public/c.html",]); + + assert_eq!(files, vec!["index.html", "public/c.html",]); + assert_eq!(globs, vec!["*"]); + assert_eq!(normalized_sources, vec!["**/*"]); } #[test] fn it_should_use_a_glob_for_top_level_folders() { - let globs = test(&[ + let ScanResult { + files, + globs, + normalized_sources, + .. + } = scan(&[ ("index.html", ""), ("src/a.html", ""), ("src/b.html", ""), ("src/c.html", ""), ]); - assert_eq!(globs, vec!["*", - "index.html", + + assert_eq!( + files, + vec!["index.html", "src/a.html", "src/b.html", "src/c.html"] + ); + assert_eq!(globs, vec![ + "*", "src/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", - "src/a.html", - "src/b.html", - "src/c.html" ]); + assert_eq!(normalized_sources, vec!["**/*"]); } #[test] fn it_should_ignore_binary_files() { - let globs = test(&[ + let ScanResult { + files, + globs, + normalized_sources, + .. + } = scan(&[ ("index.html", ""), ("a.mp4", ""), ("b.png", ""), ("c.lock", ""), ]); - assert_eq!(globs, vec!["*", "index.html"]); + + assert_eq!(files, vec!["index.html"]); + assert_eq!(globs, vec!["*"]); + assert_eq!(normalized_sources, vec!["**/*"]); } #[test] fn it_should_ignore_known_extensions() { - let globs = test(&[ + let ScanResult { + files, + globs, + normalized_sources, + .. + } = scan(&[ ("index.html", ""), ("a.css", ""), ("b.sass", ""), ("c.less", ""), ]); - assert_eq!(globs, vec!["*", "index.html"]); + + assert_eq!(files, vec!["index.html"]); + assert_eq!(globs, vec!["*"]); + assert_eq!(normalized_sources, vec!["**/*"]); + } + + #[test] + fn it_should_find_new_extensions() { + let ScanResult { + files, + globs, + normalized_sources, + .. + } = scan(&[("src/index.my-extension", "")]); + + assert_eq!(files, vec!["src/index.my-extension"]); + assert_eq!(globs, vec!["*", "src/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,my-extension,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}"]); + assert_eq!(normalized_sources, vec!["**/*"]); } #[test] fn it_should_ignore_known_files() { - let globs = test(&[ + let ScanResult { + files, + globs, + normalized_sources, + .. + } = scan(&[ ("index.html", ""), ("package-lock.json", ""), ("yarn.lock", ""), ]); - assert_eq!(globs, vec!["*", "index.html"]); + + assert_eq!(files, vec!["index.html"]); + assert_eq!(globs, vec!["*"]); + assert_eq!(normalized_sources, vec!["**/*"]); } #[test] fn it_should_ignore_and_expand_nested_ignored_folders() { - let globs = test(&[ + let ScanResult { + files, + globs, + normalized_sources, + .. + } = scan(&[ // Explicitly listed root files ("foo.html", ""), ("bar.html", ""), @@ -267,38 +402,28 @@ mod scanner { ]); assert_eq!( - globs, + files, vec![ - "*", "bar.html", "baz.html", "foo.html", - "nested-a/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", "nested-a/bar.html", "nested-a/baz.html", "nested-a/foo.html", - "nested-b/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", "nested-b/deeply-nested/bar.html", "nested-b/deeply-nested/baz.html", "nested-b/deeply-nested/foo.html", - "nested-c/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", "nested-c/bar.html", "nested-c/baz.html", "nested-c/foo.html", - "nested-c/sibling-folder/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", "nested-c/sibling-folder/bar.html", "nested-c/sibling-folder/baz.html", "nested-c/sibling-folder/foo.html", - "nested-d/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", "nested-d/bar.html", "nested-d/baz.html", "nested-d/foo.html", - "nested-d/very/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", - "nested-d/very/deeply/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", - "nested-d/very/deeply/nested/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", "nested-d/very/deeply/nested/bar.html", "nested-d/very/deeply/nested/baz.html", - "nested-d/very/deeply/nested/directory/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", "nested-d/very/deeply/nested/directory/again/foo.html", "nested-d/very/deeply/nested/directory/bar.html", "nested-d/very/deeply/nested/directory/baz.html", @@ -306,6 +431,19 @@ mod scanner { "nested-d/very/deeply/nested/foo.html", ] ); + assert_eq!(globs, vec![ + "*", + "nested-a/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "nested-b/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "nested-c/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "nested-c/sibling-folder/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "nested-d/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "nested-d/very/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "nested-d/very/deeply/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "nested-d/very/deeply/nested/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "nested-d/very/deeply/nested/directory/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + ]); + assert_eq!(normalized_sources, vec!["**/*"]); } #[test] @@ -314,7 +452,11 @@ mod scanner { ignores.push_str("# md:font-bold\n"); ignores.push_str("foo.html\n"); - let candidates = scan(&[ + let ScanResult { + candidates, + normalized_sources, + .. + } = scan(&[ // The gitignore file is used to filter out files but not scanned for candidates (".gitignore", &ignores), // A file that should definitely be scanned @@ -333,8 +475,7 @@ mod scanner { ("index2.svelte", ""), ("index3.svelte", ""), ("index4.svelte", ""), - ]) - .1; + ]); assert_eq!( candidates, @@ -351,32 +492,48 @@ mod scanner { "underline" ] ); + assert_eq!(normalized_sources, vec!["**/*"]); } #[test] fn it_should_be_possible_to_scan_in_the_parent_directory() { - let candidates = scan_with_globs( + let ScanResult { + candidates, + normalized_sources, + .. + } = scan_with_globs( &[("foo/bar/baz/foo.html", "content-['foo.html']")], - vec!["./foo/bar/baz/.."], - ) - .1; + vec!["@source '**/*'", "@source './foo/bar/baz/..'"], + ); assert_eq!(candidates, vec!["content-['foo.html']"]); + assert_eq!(normalized_sources, vec!["**/*", "foo/bar/**/*"]); } #[test] fn it_should_scan_files_without_extensions() { // These look like folders, but they are files - let candidates = - scan_with_globs(&[("my-file", "content-['my-file']")], vec!["./my-file"]).1; + let ScanResult { + candidates, + normalized_sources, + .. + } = scan_with_globs( + &[("my-file", "content-['my-file']")], + vec!["@source '**/*'", "@source './my-file'"], + ); assert_eq!(candidates, vec!["content-['my-file']"]); + assert_eq!(normalized_sources, vec!["**/*", "my-file"]); } #[test] fn it_should_scan_folders_with_extensions() { // These look like files, but they are folders - let candidates = scan_with_globs( + let ScanResult { + candidates, + normalized_sources, + .. + } = scan_with_globs( &[ ( "my-folder.templates/foo.html", @@ -387,9 +544,12 @@ mod scanner { "content-['my-folder.bin/foo.html']", ), ], - vec!["./my-folder.templates", "./my-folder.bin"], - ) - .1; + vec![ + "@source '**/*'", + "@source './my-folder.templates'", + "@source './my-folder.bin'", + ], + ); assert_eq!( candidates, @@ -398,26 +558,38 @@ mod scanner { "content-['my-folder.templates/foo.html']", ] ); + assert_eq!( + normalized_sources, + vec!["**/*", "my-folder.bin/**/*", "my-folder.templates/**/*"] + ); } #[test] fn it_should_scan_content_paths() { - let candidates = scan_with_globs( + let ScanResult { + candidates, + normalized_sources, + .. + } = scan_with_globs( &[ // We know that `.styl` extensions are ignored, so they are not covered by auto content // detection. ("foo.styl", "content-['foo.styl']"), ], - vec!["*.styl"], - ) - .1; + vec!["@source '**/*'", "@source '*.styl'"], + ); assert_eq!(candidates, vec!["content-['foo.styl']"]); + assert_eq!(normalized_sources, vec!["**/*", "*.styl"]); } #[test] fn it_should_scan_next_dynamic_folders() { - let candidates = scan_with_globs( + let ScanResult { + candidates, + normalized_sources, + .. + } = scan_with_globs( &[ // We know that `.styl` extensions are ignored, so they are not covered by auto content // detection. @@ -426,9 +598,8 @@ mod scanner { ("app/[[...slug]]/page.styl", "content-['[[...slug]]']"), ("app/(theme)/page.styl", "content-['(theme)']"), ], - vec!["./**/*.{styl}"], - ) - .1; + vec!["@source '**/*'", "@source './**/*.{styl}'"], + ); assert_eq!( candidates, @@ -439,6 +610,7 @@ mod scanner { "content-['[slug]']", ], ); + assert_eq!(normalized_sources, vec!["**/*", "**/*.styl"]); } #[test] @@ -461,12 +633,13 @@ mod scanner { // Get POSIX-style absolute path let full_path = format!("{}", dir.display()).replace('\\', "/"); - let sources = vec![GlobEntry { + let sources = vec![PublicSourceEntry { base: full_path.clone(), pattern: full_path.clone(), + negated: false, }]; - let mut scanner = Scanner::new(Some(sources)); + let mut scanner = Scanner::new(sources); let candidates = scanner.scan(); // We've done the initial scan and found the files @@ -481,18 +654,23 @@ mod scanner { #[test] fn it_should_scan_content_paths_even_when_they_are_git_ignored() { - let candidates = scan_with_globs( + let ScanResult { + candidates, + normalized_sources, + .. + } = scan_with_globs( &[ (".gitignore", "foo.styl"), // We know that `.styl` extensions are ignored, so they are not covered by auto content // detection. ("foo.styl", "content-['foo.styl']"), ], - vec!["foo.styl"], - ) - .1; + // But explicitly including them should still work + vec!["@source '**/*'", "@source 'foo.styl'"], + ); assert_eq!(candidates, vec!["content-['foo.styl']"]); + assert_eq!(normalized_sources, vec!["**/*", "foo.styl"]); } #[test] @@ -513,17 +691,11 @@ mod scanner { ); let sources = vec![ - GlobEntry { - base: dir.join("project-a").to_string_lossy().to_string(), - pattern: "**/*".to_owned(), - }, - GlobEntry { - base: dir.join("project-b").to_string_lossy().to_string(), - pattern: "**/*".to_owned(), - }, + public_source_entry_from_pattern(dir.join("project-a"), "@source '**/*'"), + public_source_entry_from_pattern(dir.join("project-b"), "@source '**/*'"), ]; - let mut scanner = Scanner::new(Some(sources)); + let mut scanner = Scanner::new(sources); let candidates = scanner.scan(); // We've done the initial scan and found the files @@ -632,6 +804,244 @@ mod scanner { ); } + #[test] + fn it_should_ignore_negated_custom_sources() { + let ScanResult { + candidates, + files, + globs, + normalized_sources, + } = scan_with_globs( + &[ + ("src/index.ts", "content-['src/index.ts']"), + ("src/colors/red.jsx", "content-['src/colors/red.jsx']"), + ("src/colors/blue.tsx", "content-['src/colors/blue.tsx']"), + ("src/colors/green.tsx", "content-['src/colors/green.tsx']"), + ("src/utils/string.ts", "content-['src/utils/string.ts']"), + ("src/utils/date.ts", "content-['src/utils/date.ts']"), + ("src/utils/file.ts", "content-['src/utils/file.ts']"), + ( + "src/admin/foo/template.html", + "content-['src/admin/template.html']", + ), + ( + "src/templates/index.html", + "content-['src/templates/index.html']", + ), + ], + vec![ + "@source '**/*'", + "@source not 'src/index.ts'", + "@source not '**/*.{jsx,tsx}'", + "@source not 'src/utils'", + "@source not 'dist'", + ], + ); + + assert_eq!( + candidates, + vec![ + "content-['src/admin/template.html']", + "content-['src/templates/index.html']", + ] + ); + + assert_eq!( + files, + vec![ + "src/admin/foo/template.html", + "src/templates/index.html", + // These files are ignored and thus do not need to be watched: + + // "src/colors/blue.tsx", + // "src/colors/green.tsx", + // "src/colors/red.jsx", + // "src/index.ts", + // "src/utils/date.ts", + // "src/utils/file.ts", + // "src/utils/string.ts" + ] + ); + assert_eq!( + globs, + vec![ + "*", + "src/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "src/admin/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "src/colors/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "src/templates/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + ] + ); + + assert_eq!(normalized_sources, vec!["**/*",]); + } + + #[test] + fn it_should_include_defined_extensions_that_are_ignored_by_default() { + let ScanResult { + candidates, + files, + globs, + normalized_sources, + } = scan_with_globs( + // Typically skipped + &[ + ("src/index.exe", "content-['src/index.exe']"), + ("src/index.bin", "content-['src/index.bin']"), + ("out/out.exe", "content-['out/out.exe']"), + ], + // But explicitly included + vec!["@source '**/*'", "@source 'src/**/*.{exe,bin}'"], + ); + + assert_eq!( + candidates, + vec!["content-['src/index.bin']", "content-['src/index.exe']",] + ); + assert_eq!(files, vec!["src/index.bin", "src/index.exe",]); + assert_eq!( + globs, + vec![ + "*", + // Contains `.exe` and `.bin` in the list + "out/**/*.{aspx,astro,bin,cjs,cts,eex,erb,exe,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", + "src/{**/*.bin,**/*.exe,**/*.{aspx,astro,bin,cjs,cts,eex,erb,exe,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}}", + ] + ); + assert_eq!( + normalized_sources, + vec!["**/*", "src/**/*.bin", "src/**/*.exe"] + ); + } + + #[test] + fn it_should_work_with_manual_glob_only() { + let ScanResult { + candidates, + files, + globs, + normalized_sources, + } = scan_with_globs( + &[ + ("index.html", "content-['index.html']"), + ("src/index.html", "content-['src/index.html']"), + ("src/ignore.html", "content-['src/ignore.html']"), + ("src/admin/index.html", "content-['src/admin/index.html']"), + ("src/admin/ignore.html", "content-['src/admin/ignore.html']"), + ( + "src/dashboard/index.html", + "content-['src/dashboard/index.html']", + ), + ( + "src/dashboard/ignore.html", + "content-['src/dashboard/ignore.html']", + ), + ("src/lib.ts", "content-['src/lib.ts']"), + ], + vec![ + "@source './src/**/*.html'", + "@source not './src/index.html'", + "@source not './src/**/ignore.html'", + ], + ); + + assert_eq!( + candidates, + vec![ + "content-['src/admin/index.html']", + "content-['src/dashboard/index.html']", + ] + ); + + assert_eq!( + files, + vec!["src/admin/index.html", "src/dashboard/index.html",] + ); + assert_eq!(globs, vec!["src/**/*.html"]); + assert_eq!(normalized_sources, vec!["src/**/*.html"]); + } + + #[test] + fn it_respects_gitignore_in_workspace_root() { + let ScanResult { + candidates, + files, + globs, + normalized_sources, + } = scan_with_globs( + &[ + (".gitignore", "ignore-1.html\nweb/ignore-2.html"), + ("src/index.html", "content-['src/index.html']"), + ("web/index.html", "content-['web/index.html']"), + ("web/ignore-1.html", "content-['web/ignore-1.html']"), + ("web/ignore-2.html", "content-['web/ignore-2.html']"), + ], + vec!["@source './src'", "@source './web'"], + ); + + assert_eq!( + candidates, + vec!["content-['src/index.html']", "content-['web/index.html']",] + ); + + assert_eq!(files, vec!["src/index.html", "web/index.html",]); + assert_eq!(globs, vec!["src/*", "web/*",]); + assert_eq!(normalized_sources, vec!["src/**/*", "web/**/*",]); + } + + #[test] + fn it_includes_skipped_by_default_extensions_with_a_specific_source() { + let ScanResult { + candidates, + files, + globs, + normalized_sources, + } = scan_with_globs( + &[ + ("src/logo.jpg", "content-['/src/logo.jpg']"), + ("src/logo.png", "content-['/src/logo.png']"), + ], + vec!["@source './src/logo.{jpg,png}'"], + ); + + assert_eq!( + candidates, + vec!["content-['/src/logo.jpg']", "content-['/src/logo.png']"] + ); + assert_eq!(files, vec!["src/logo.jpg", "src/logo.png"]); + assert!(globs.is_empty()); + assert_eq!(normalized_sources, vec!["src/logo.jpg", "src/logo.png"]); + } + + #[test] + fn it_respects_gitignore_in_workspace_root_for_manual_globs() { + let ScanResult { + candidates, + files, + globs, + normalized_sources, + } = scan_with_globs( + &[ + (".gitignore", "ignore-1.html\n/web/ignore-2.html"), + ("web/index.html", "content-['web/index.html']"), + ("web/ignore-1.html", "content-['web/ignore-1.html']"), + ("web/ignore-2.html", "content-['web/ignore-2.html']"), + ], + vec!["@source './web'", "@source './web/ignore-1.html'"], + ); + assert_eq!( + candidates, + vec![ + "content-['web/ignore-1.html']", + "content-['web/index.html']", + ] + ); + + assert_eq!(files, vec!["web/ignore-1.html", "web/index.html",]); + assert_eq!(globs, vec!["web/*"]); + assert_eq!(normalized_sources, vec!["web/**/*", "web/ignore-1.html"]); + } + #[test] fn skips_ignore_files_outside_of_a_repo() { // Create a temporary working directory @@ -665,21 +1075,54 @@ mod scanner { "home/project/apps/web/ignore-web.html", "content-['ignore-web.html']", ), + // Auto content detection outside of `web/` + ( + "home/project/apps/admin/index.html", + "content-['home/project/apps/admin/index.html']", + ), + // Manual sources outside of `web/` + ( + "home/project/apps/dashboard/index.html", + "content-['home/project/apps/dashboard/index.html']", + ), ], ); - let sources = vec![GlobEntry { - base: dir - .join("home/project/apps/web") - .to_string_lossy() - .to_string(), - pattern: "**/*".to_owned(), - }]; + let sources = vec![ + public_source_entry_from_pattern( + dir.join("home/project/apps/web") + .to_string_lossy() + .to_string() + .into(), + "@source '**/*'", + ), + public_source_entry_from_pattern( + dir.join("home/project/apps/web") + .to_string_lossy() + .to_string() + .into(), + "@source '../admin'", + ), + public_source_entry_from_pattern( + dir.join("home/project/apps/web") + .to_string_lossy() + .to_string() + .into(), + "@source '../dashboard/*.html'", + ), + ]; - let candidates = Scanner::new(Some(sources.clone())).scan(); + let candidates = Scanner::new(sources.clone()).scan(); // All ignore files are applied because there's no git repo - assert_eq!(candidates, vec!["content-['index.html']".to_owned(),]); + assert_eq!( + candidates, + vec![ + "content-['home/project/apps/admin/index.html']", + "content-['home/project/apps/dashboard/index.html']", + "content-['index.html']" + ] + ); // Initialize `home` as a git repository and scan again // The results should be the same as before @@ -687,9 +1130,16 @@ mod scanner { .arg("init") .current_dir(dir.join("home")) .output(); - let candidates = Scanner::new(Some(sources.clone())).scan(); + let candidates = Scanner::new(sources.clone()).scan(); - assert_eq!(candidates, vec!["content-['index.html']".to_owned(),]); + assert_eq!( + candidates, + vec![ + "content-['home/project/apps/admin/index.html']", + "content-['home/project/apps/dashboard/index.html']", + "content-['index.html']" + ] + ); // Drop the .git folder fs::remove_dir_all(dir.join("home/.git")).unwrap(); @@ -699,13 +1149,15 @@ mod scanner { .arg("init") .current_dir(dir.join("home/project")) .output(); - let candidates = Scanner::new(Some(sources.clone())).scan(); + let candidates = Scanner::new(sources.clone()).scan(); assert_eq!( candidates, vec![ - "content-['ignore-home.html']".to_owned(), - "content-['index.html']".to_owned(), + "content-['home/project/apps/admin/index.html']", + "content-['home/project/apps/dashboard/index.html']", + "content-['ignore-home.html']", + "content-['index.html']" ] ); @@ -717,14 +1169,16 @@ mod scanner { .arg("init") .current_dir(dir.join("home/project/apps")) .output(); - let candidates = Scanner::new(Some(sources.clone())).scan(); + let candidates = Scanner::new(sources.clone()).scan(); assert_eq!( candidates, vec![ - "content-['ignore-home.html']".to_owned(), - "content-['ignore-project.html']".to_owned(), - "content-['index.html']".to_owned(), + "content-['home/project/apps/admin/index.html']", + "content-['home/project/apps/dashboard/index.html']", + "content-['ignore-home.html']", + "content-['ignore-project.html']", + "content-['index.html']" ] ); @@ -736,16 +1190,425 @@ mod scanner { .arg("init") .current_dir(dir.join("home/project/apps/web")) .output(); - let candidates = Scanner::new(Some(sources.clone())).scan(); + + let candidates = Scanner::new(sources.clone()).scan(); assert_eq!( candidates, vec![ - "content-['ignore-apps.html']".to_owned(), - "content-['ignore-home.html']".to_owned(), - "content-['ignore-project.html']".to_owned(), - "content-['index.html']".to_owned(), + "content-['home/project/apps/admin/index.html']", + "content-['home/project/apps/dashboard/index.html']", + "content-['ignore-apps.html']", + "content-['ignore-home.html']", + "content-['ignore-project.html']", + "content-['index.html']", ] ); } + + #[test] + fn test_explicitly_ignore_explicitly_allowed_files() { + // Create a temporary working directory + let dir = tempdir().unwrap().into_path(); + + // Create files + create_files_in( + &dir, + &[ + ("src/keep-me.html", "content-['keep-me.html']"), + ("src/ignore-me.html", "content-['ignore-me.html']"), + ], + ); + + let sources = vec![ + public_source_entry_from_pattern(dir.clone(), "@source '**/*.html'"), + public_source_entry_from_pattern(dir.clone(), "@source not 'src/ignore-me.html'"), + ]; + + let candidates = Scanner::new(sources.clone()).scan(); + + assert_eq!(candidates, vec!["content-['keep-me.html']"]); + } + + #[test] + fn test_works_with_filenames_containing_glob_characters() { + // Create a temporary working directory + let dir = tempdir().unwrap().into_path(); + + // Create files + create_files_in( + &dir, + &[ + ("src/app/[foo]/ignore-me.html", "content-['ignore-me.html']"), + ("src/app/[foo]/keep-me.html", "content-['keep-me.html']"), + ], + ); + + let sources = vec![ + public_source_entry_from_pattern(dir.clone(), "@source '**/*'"), + public_source_entry_from_pattern( + dir.clone(), + "@source not 'src/app/[foo]/ignore*.html'", + ), + ]; + + let candidates = Scanner::new(sources.clone()).scan(); + + assert_eq!(candidates, vec!["content-['keep-me.html']"]); + } + + #[test] + fn test_ignore_files_can_be_included_with_custom_source_rule() { + // Create a temporary working directory + let dir = tempdir().unwrap().into_path(); + + // Create files + create_files_in( + &dir, + &[("src/keep-me.html", "content-['src/keep-me.html']")], + ); + + let mut scanner = Scanner::new(vec![ + public_source_entry_from_pattern(dir.clone(), "@source '**/*.html'"), + public_source_entry_from_pattern( + dir.clone(), + "@source not 'src/ignored-by-source-not.html'", + ), + ]); + + let candidates = scanner.scan(); + assert_eq!(candidates, vec!["content-['src/keep-me.html']"]); + + // Create new files that should definitely be ignored + create_files_in( + &dir, + &[ + // Create new file that matches the `@source '…'` glob + ("src/new-file.html", "content-['src/new-file.html']"), + // Create new file that is ignored based on file extension + ( + "src/ignore-by-extension.bin", + "content-['src/ignore-by-extension.bin']", + ), + // Create a file that is ignored based on the `.gitignore` file + (".gitignore", "src/ignored-by-gitignore.html"), + ( + "src/ignored-by-gitignore.html", + "content-['src/ignored-by-gitignore.html']", + ), + // Create a file that is ignored by the `@source not '…'` + ( + "src/ignored-by-source-not.html", + "content-['src/ignored-by-source-not.html']", + ), + ], + ); + + let candidates = scanner.scan(); + + assert_eq!( + candidates, + vec![ + // Ignored by git ignore BUT included by `@source "**/*.html"` + "content-['src/ignored-by-gitignore.html']", + "content-['src/keep-me.html']", + "content-['src/new-file.html']" + ] + ); + } + + #[test] + fn test_allow_default_ignored_files() { + // Create a temporary working directory + let dir = tempdir().unwrap().into_path(); + + // Create files + create_files_in(&dir, &[("foo.styl", "content-['foo.styl']")]); + + let sources = vec![public_source_entry_from_pattern( + dir.clone(), + "@source '**/*'", + )]; + + let mut scanner = Scanner::new(sources.clone()); + + let candidates = scanner.scan(); + assert!(candidates.is_empty()); + + // Explicitly allow `.styl` files + let mut scanner = Scanner::new(vec![ + public_source_entry_from_pattern(dir.clone(), "@source '**/*'"), + public_source_entry_from_pattern(dir.clone(), "@source '*.styl'"), + ]); + + let candidates = scanner.scan(); + assert_eq!(candidates, vec!["content-['foo.styl']"]); + } + + #[test] + fn test_allow_default_ignored_files_via_gitignore() { + // Create a temporary working directory + let dir = tempdir().unwrap().into_path(); + + // Create files + create_files_in( + &dir, + &[ + ("index.html", "content-['index.html']"), + (".gitignore", "index.html"), + ], + ); + + let mut scanner = Scanner::new(vec![public_source_entry_from_pattern( + dir.clone(), + "@source '**/*'", + )]); + + let candidates = scanner.scan(); + assert!(candidates.is_empty()); + + let mut scanner = Scanner::new(vec![ + public_source_entry_from_pattern(dir.clone(), "@source '**/*'"), + public_source_entry_from_pattern(dir.clone(), "@source './*.html'"), + ]); + + let candidates = scanner.scan(); + assert_eq!(candidates, vec!["content-['index.html']"]); + } + + #[test] + fn test_allow_explicit_node_modules_paths() { + // Create a temporary working directory + let dir = tempdir().unwrap().into_path(); + + // Create files + create_files_in( + &dir, + &[ + // Current project + ("src/index.html", "content-['src/index.html']"), + // Ignore file + (".gitignore", "node_modules"), + // Library ignored by default + ( + "node_modules/my-ui-lib/index.html", + "content-['node_modules/my-ui-lib/index.html']", + ), + ], + ); + + // Default auto source detection + let sources = vec![public_source_entry_from_pattern( + dir.clone(), + "@source './'", + )]; + + let mut scanner = Scanner::new(sources.clone()); + + let candidates = scanner.scan(); + assert_eq!(candidates, vec!["content-['src/index.html']"]); + + // Explicitly listing all `*.html` files, should not include `node_modules` because it's + // ignored + let sources = vec![public_source_entry_from_pattern( + dir.clone(), + "@source '**/*.html'", + )]; + + let mut scanner = Scanner::new(sources.clone()); + let candidates = scanner.scan(); + assert_eq!(candidates, vec!["content-['src/index.html']"]); + + // Explicitly listing all `*.html` files + // Explicitly list the `node_modules/my-ui-lib` + // + let sources = vec![ + public_source_entry_from_pattern(dir.clone(), "@source '**/*.html'"), + public_source_entry_from_pattern(dir.clone(), "@source 'node_modules/my-ui-lib'"), + ]; + + let mut scanner = Scanner::new(sources.clone()); + let candidates = scanner.scan(); + assert_eq!( + candidates, + vec![ + "content-['node_modules/my-ui-lib/index.html']", + "content-['src/index.html']" + ] + ); + } + + #[test] + fn test_ignore_node_modules_without_gitignore() { + let ScanResult { + candidates, + files, + globs, + normalized_sources, + } = scan_with_globs( + &[ + ( + "packages/web/index.html", + "content-['packages/web/index.html']", + ), + ( + "node_modules/index.html", + "content-['node_modules/index.html']", + ), + ( + "packages/web/node_modules/index.html", + "content-['packages/web/node_modules/index.html']", + ), + ], + vec!["@source '**/*'"], + ); + + assert_eq!(candidates, vec!["content-['packages/web/index.html']"]); + + assert_eq!(files, vec!["packages/web/index.html",]); + assert_eq!(globs, vec!["*", "packages/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}", "packages/web/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}"]); + assert_eq!(normalized_sources, vec!["**/*"]); + } + + #[test] + fn test_ignore_gitignore_in_node_modules_source() { + let ScanResult { + candidates, + files, + globs, + normalized_sources, + } = scan_with_globs( + &[ + (".gitignore", "node_modules\ndist"), + ( + "node_modules/my-ui-lib/dist/index.html", + "content-['node_modules/my-ui-lib/dist/index.html']", + ), + ( + "node_modules/my-ui-lib/node.exe", + "content-['node_modules/my-ui-lib/node.exe']", + ), + ], + vec!["@source 'node_modules/my-ui-lib'"], + ); + + assert_eq!( + candidates, + vec!["content-['node_modules/my-ui-lib/dist/index.html']"] + ); + assert_eq!(files, vec!["node_modules/my-ui-lib/dist/index.html"]); + assert_eq!(globs, vec!["node_modules/my-ui-lib/*", "node_modules/my-ui-lib/dist/**/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}"]); + assert_eq!(normalized_sources, vec!["node_modules/my-ui-lib/**/*"]); + } + + #[test] + fn test_manually_scanning_files_should_follow_all_rules() { + // Create a temporary working directory + let dir = tempdir().unwrap().into_path(); + + // Create files + create_files_in( + &dir, + &[ + // Ignore all `.jsx` files, and all `generated` folders + (".gitignore", "*.jsx\ngenerated/"), + // .tsx files are allowed + ( + "src/components/button.tsx", + "content-['src/components/button.tsx']", + ), + // .jsx files are not allowed + ( + "src/components/button.jsx", + "content-['src/components/button.jsx']", + ), + ], + ); + + let mut scanner = Scanner::new(vec![public_source_entry_from_pattern( + dir.clone(), + "@source '**/*'", + )]); + + let candidates = scanner.scan(); + assert_eq!(candidates, vec!["content-['src/components/button.tsx']"]); + + // Create 2 new files, one "good" and one "bad" file, and manually scan them. This should + // only return the "good" file because the "bad" one is ignored by a `.gitignore` file. + create_files_in( + &dir, + &[ + ( + "src/components/good.tsx", + "content-['src/components/good.tsx']", + ), + ( + "src/components/bad.jsx", + "content-['src/components/bad.jsx']", + ), + ], + ); + + let candidates = scanner.scan_content(vec![ + ChangedContent::File(dir.join("src/components/good.tsx"), "tsx".to_owned()), + ChangedContent::File(dir.join("src/components/bad.jsx"), "jsx".to_owned()), + ]); + + assert_eq!(candidates, vec!["content-['src/components/good.tsx']"]); + + // Create a generated file in a nested folder that is ignored by a `.gitignore` file higher + // up the tree. + create_files_in( + &dir, + &[ + ( + "src/components/generated/bad.tsx", + "content-['src/components/generated/bad.tsx']", + ), + ( + "src/components/generated/bad.jsx", + "content-['src/components/generated/bad.jsx']", + ), + ], + ); + + let candidates = scanner.scan_content(vec![ + ChangedContent::File( + dir.join("src/components/generated/bad.tsx"), + "tsx".to_owned(), + ), + ChangedContent::File( + dir.join("src/components/generated/bad.jsx"), + "jsx".to_owned(), + ), + ]); + + assert!(candidates.is_empty()); + } + + #[test] + fn test_works_with_utf8_special_character_paths() { + let ScanResult { + candidates, + files, + globs, + normalized_sources, + } = scan_with_globs( + &[ + ("src/💩.js", "content-['src/💩.js']"), + ("src/🤦‍♂️.tsx", "content-['src/🤦‍♂️.tsx']"), + ("src/🤦‍♂️/foo.tsx", "content-['src/🤦‍♂️/foo.tsx']"), + ], + vec!["@source '**/*'", "@source not 'src/🤦‍♂️'"], + ); + + assert_eq!( + candidates, + vec!["content-['src/💩.js']", "content-['src/🤦‍♂️.tsx']"] + ); + + assert_eq!(files, vec!["src/💩.js", "src/🤦‍♂️.tsx"]); + assert_eq!(globs, vec!["*", "src/*/*.{aspx,astro,cjs,cts,eex,erb,gjs,gts,haml,handlebars,hbs,heex,html,jade,js,jsx,liquid,md,mdx,mjs,mts,mustache,njk,nunjucks,php,pug,py,razor,rb,rhtml,rs,slim,svelte,tpl,ts,tsx,twig,vue}"]); + assert_eq!(normalized_sources, vec!["**/*"]); + } } diff --git a/integrations/cli/index.test.ts b/integrations/cli/index.test.ts index 487c88e40b76..dc06ecb095c0 100644 --- a/integrations/cli/index.test.ts +++ b/integrations/cli/index.test.ts @@ -665,7 +665,7 @@ test( /* (4) */ /* - './pages' should be auto-scanned */ /* - Only '.html' files should be included */ - /* - './page/ignored.html' should be ignored */ + /* - './page/ignored.html' will not be ignored because of the specific pattern */ @source "./pages/**/*.html"; `, @@ -702,7 +702,7 @@ test( // (4) 'pages/foo.html': 'content-["pages/foo.html"]', 'pages/nested/foo.html': 'content-["pages/nested/foo.html"]', - 'pages/ignored.html': 'content-["pages/ignored.html"] content-["BAD"]', + 'pages/ignored.html': 'content-["pages/ignored.html"]', 'pages/foo.jsx': 'content-["pages/foo.jsx"] content-["BAD"]', 'pages/nested/foo.jsx': 'content-["pages/nested/foo.jsx"] content-["BAD"]', }, @@ -733,6 +733,10 @@ test( --tw-content: "pages/foo.html"; content: var(--tw-content); } + .content-\\[\\"pages\\/ignored\\.html\\"\\] { + --tw-content: "pages/ignored.html"; + content: var(--tw-content); + } .content-\\[\\"pages\\/nested\\/foo\\.html\\"\\] { --tw-content: "pages/nested/foo.html"; content: var(--tw-content); @@ -893,10 +897,11 @@ test( bar.html `, - // Project D, foo.html is ignored by the gitignore file. + // Project D, foo.html is ignored by the gitignore file but the source rule is explicit about + // adding all `.html` files. 'project-d/src/foo.html': html`
`, @@ -971,6 +976,10 @@ test( --tw-content: 'project-d/src/bar.html'; content: var(--tw-content); } + .content-\\[\\'project-d\\/src\\/foo\\.html\\'\\] { + --tw-content: 'project-d/src/foo.html'; + content: var(--tw-content); + } .content-\\[\\'project-d\\/src\\/index\\.html\\'\\] { --tw-content: 'project-d/src/index.html'; content: var(--tw-content); @@ -1135,15 +1144,13 @@ test( @reference 'tailwindcss/theme'; /* (1) */ - /* - Only './src' should be auto-scanned, not the current working directory */ - /* - .gitignore'd paths should be ignored (node_modules) */ - /* - Binary extensions should be ignored (jpg, zip) */ + /* - Disable auto-source detection */ @import 'tailwindcss/utilities' source(none); /* (2) */ /* - './pages' should be auto-scanned */ /* - Only '.html' files should be included */ - /* - './page/ignored.html' should be ignored */ + /* - './page/ignored.html' will not be ignored because of the specific pattern */ @source "./pages/**/*.html"; `, @@ -1163,7 +1170,7 @@ test( // (4) 'pages/foo.html': 'content-["pages/foo.html"]', 'pages/nested/foo.html': 'content-["pages/nested/foo.html"]', - 'pages/ignored.html': 'content-["pages/ignored.html"] content-["BAD"]', + 'pages/ignored.html': 'content-["pages/ignored.html"]', 'pages/foo.jsx': 'content-["pages/foo.jsx"] content-["BAD"]', 'pages/nested/foo.jsx': 'content-["pages/nested/foo.jsx"] content-["BAD"]', }, @@ -1178,6 +1185,10 @@ test( --tw-content: "pages/foo.html"; content: var(--tw-content); } + .content-\\[\\"pages\\/ignored\\.html\\"\\] { + --tw-content: "pages/ignored.html"; + content: var(--tw-content); + } .content-\\[\\"pages\\/nested\\/foo\\.html\\"\\] { --tw-content: "pages/nested/foo.html"; content: var(--tw-content); diff --git a/integrations/postcss/index.test.ts b/integrations/postcss/index.test.ts index 524752b217f7..63a0f019eaf8 100644 --- a/integrations/postcss/index.test.ts +++ b/integrations/postcss/index.test.ts @@ -1,4 +1,3 @@ -import dedent from 'dedent' import path from 'node:path' import { candidate, css, html, js, json, test, ts, yaml } from '../utils' @@ -636,589 +635,3 @@ test( await fs.expectFileToContain('project-a/dist/out.css', [candidate`content-['c/src/index.js']`]) }, ) - -test( - 'auto source detection kitchen sink', - { - fs: { - 'package.json': json` - { - "dependencies": { - "postcss": "^8", - "postcss-cli": "^10", - "tailwindcss": "workspace:^", - "@tailwindcss/postcss": "workspace:^" - } - } - `, - 'postcss.config.js': js` - module.exports = { - plugins: { - '@tailwindcss/postcss': {}, - }, - } - `, - 'index.css': css` - @reference 'tailwindcss/theme'; - - /* (1) */ - /* - Only './src' should be auto-scanned, not the current working directory */ - /* - .gitignore'd paths should be ignored (node_modules) */ - /* - Binary extensions should be ignored (jpg, zip) */ - @import 'tailwindcss/utilities' source('./src'); - - /* (2) */ - /* - All HTML and JSX files in 'ignored/components' should be scanned */ - /* - All other extensions should be ignored */ - @source "./ignored/components/*.{html,jsx}"; - - /* (3) */ - /* - './components' should be auto-scanned in addition to './src' */ - /* - './components/ignored.html' should still be ignored */ - /* - Binary extensions in './components' should be ignored */ - @source "./components"; - - /* (4) */ - /* - './pages' should be auto-scanned */ - /* - Only '.html' files should be included */ - /* - './page/ignored.html' should be ignored */ - @source "./pages/**/*.html"; - `, - - '.gitignore': dedent` - /src/ignored - /ignored - /components/ignored.html - /pages/ignored.html - `, - - // (1) - 'index.html': 'content-["index.html"] content-["BAD"]', // "Root" source is in `./src` - 'src/index.html': 'content-["src/index.html"]', - 'src/nested/index.html': 'content-["src/nested/index.html"]', - 'src/index.jpg': 'content-["src/index.jpg"] content-["BAD"]', - 'src/nested/index.tar': 'content-["src/nested/index.tar"] content-["BAD"]', - 'src/ignored/index.html': 'content-["src/ignored/index.html"] content-["BAD"]', - - // (2) - 'ignored/components/my-component.html': 'content-["ignored/components/my-component.html"]', - 'ignored/components/my-component.jsx': 'content-["ignored/components/my-component.jsx"]', - - // Ignored and not explicitly listed by (2) - 'ignored/components/my-component.tsx': - 'content-["ignored/components/my-component.tsx"] content-["BAD"]', - 'ignored/components/nested/my-component.html': - 'content-["ignored/components/nested/my-component.html"] content-["BAD"]', - - // (3) - 'components/my-component.tsx': 'content-["components/my-component.tsx"]', - 'components/nested/my-component.tsx': 'content-["components/nested/my-component.tsx"]', - 'components/ignored.html': 'content-["components/ignored.html"] content-["BAD"]', - - // (4) - 'pages/foo.html': 'content-["pages/foo.html"]', - 'pages/nested/foo.html': 'content-["pages/nested/foo.html"]', - 'pages/ignored.html': 'content-["pages/ignored.html"] content-["BAD"]', - 'pages/foo.jsx': 'content-["pages/foo.jsx"] content-["BAD"]', - 'pages/nested/foo.jsx': 'content-["pages/nested/foo.jsx"] content-["BAD"]', - }, - }, - async ({ fs, exec, expect }) => { - await exec('pnpm postcss index.css --output dist/out.css') - - expect(await fs.dumpFiles('./dist/*.css')).toMatchInlineSnapshot(` - " - --- ./dist/out.css --- - .content-\\[\\"components\\/my-component\\.tsx\\"\\] { - --tw-content: "components/my-component.tsx"; - content: var(--tw-content); - } - .content-\\[\\"components\\/nested\\/my-component\\.tsx\\"\\] { - --tw-content: "components/nested/my-component.tsx"; - content: var(--tw-content); - } - .content-\\[\\"ignored\\/components\\/my-component\\.html\\"\\] { - --tw-content: "ignored/components/my-component.html"; - content: var(--tw-content); - } - .content-\\[\\"ignored\\/components\\/my-component\\.jsx\\"\\] { - --tw-content: "ignored/components/my-component.jsx"; - content: var(--tw-content); - } - .content-\\[\\"pages\\/foo\\.html\\"\\] { - --tw-content: "pages/foo.html"; - content: var(--tw-content); - } - .content-\\[\\"pages\\/nested\\/foo\\.html\\"\\] { - --tw-content: "pages/nested/foo.html"; - content: var(--tw-content); - } - .content-\\[\\"src\\/index\\.html\\"\\] { - --tw-content: "src/index.html"; - content: var(--tw-content); - } - .content-\\[\\"src\\/nested\\/index\\.html\\"\\] { - --tw-content: "src/nested/index.html"; - content: var(--tw-content); - } - @property --tw-content { - syntax: "*"; - inherits: false; - initial-value: ""; - } - " - `) - }, -) - -test( - 'auto source detection in depth, source(…) and `@source` can be configured to use auto source detection (build + watch mode)', - { - fs: { - 'package.json': json`{}`, - 'pnpm-workspace.yaml': yaml` - # - packages: - - project-a - `, - 'project-a/package.json': json` - { - "dependencies": { - "postcss": "^8", - "postcss-cli": "^10", - "tailwindcss": "workspace:^", - "@tailwindcss/postcss": "workspace:^" - } - } - `, - 'project-a/postcss.config.js': js` - module.exports = { - plugins: { - '@tailwindcss/postcss': {}, - }, - } - `, - 'project-a/src/index.css': css` - @reference 'tailwindcss/theme'; - - /* Run auto-content detection in ../../project-b */ - @import 'tailwindcss/utilities' source('../../project-b'); - - /* Explicitly using node_modules in the @source allows git ignored folders */ - @source '../node_modules/{my-lib-1,my-lib-2}/src/**/*.html'; - - /* We typically ignore these extensions, but now include them explicitly */ - @source './logo.{jpg,png}'; - - /* Project C should apply auto source detection */ - @source '../../project-c'; - - /* Project D should apply auto source detection rules, such as ignoring node_modules */ - @source '../../project-d/**/*.{html,js}'; - @source '../../project-d/**/*.bin'; - - /* Same as above, but my-lib-2 _should_ be includes */ - @source '../../project-d/node_modules/my-lib-2/src/*.{html,js}'; - - /* bar.html is git ignored, but explicitly listed here to scan */ - @source '../../project-d/src/bar.html'; - `, - - // Project A is the current folder, but we explicitly configured - // `source(project-b)`, therefore project-a should not be included in - // the output. - 'project-a/src/index.html': html` -
- `, - - // Project A explicitly includes an extension we usually ignore, - // therefore it should be included in the output. - 'project-a/src/logo.jpg': html` -
- `, - - // Project A explicitly includes node_modules/{my-lib-1,my-lib-2}, - // therefore these files should be included in the output. - 'project-a/node_modules/my-lib-1/src/index.html': html` -
- `, - 'project-a/node_modules/my-lib-2/src/index.html': html` -
- `, - - // Project B is the configured `source(…)`, therefore auto source - // detection should include known extensions and folders in the output. - 'project-b/src/index.html': html` -
- `, - - // Project B is the configured `source(…)`, therefore auto source - // detection should apply and node_modules should not be included in the - // output. - 'project-b/node_modules/my-lib-3/src/index.html': html` -
- `, - - // Project C should apply auto source detection, therefore known - // extensions and folders should be included in the output. - 'project-c/src/index.html': html` -
- `, - - // Project C should apply auto source detection, therefore known ignored - // extensions should not be included in the output. - 'project-c/src/logo.jpg': html` -
- `, - - // Project C should apply auto source detection, therefore node_modules - // should not be included in the output. - 'project-c/node_modules/my-lib-1/src/index.html': html` -
- `, - - // Project D should apply auto source detection rules, such as ignoring - // node_modules. - 'project-d/node_modules/my-lib-1/src/index.html': html` -
- `, - - // Project D has an explicit glob containing node_modules, thus should include the html file - 'project-d/node_modules/my-lib-2/src/index.html': html` -
- `, - - 'project-d/src/.gitignore': dedent` - foo.html - bar.html - `, - - // Project D, foo.html is ignored by the gitignore file. - 'project-d/src/foo.html': html` -
- `, - - // Project D, bar.html is ignored by the gitignore file. But explicitly - // listed as a `@source` glob. - 'project-d/src/bar.html': html` -
- `, - - // Project D should look for files with the extensions html and js. - 'project-d/src/index.html': html` -
- `, - - // Project D should have a binary file even though we ignore binary files - // by default, but it's explicitly listed. - 'project-d/my-binary-file.bin': html` -
- `, - }, - }, - async ({ fs, exec, spawn, root, expect }) => { - await exec('pnpm postcss src/index.css --output dist/out.css --verbose', { - cwd: path.join(root, 'project-a'), - }) - - expect(await fs.dumpFiles('./project-a/dist/*.css')).toMatchInlineSnapshot(` - " - --- ./project-a/dist/out.css --- - .content-\\[\\'project-a\\/node_modules\\/my-lib-1\\/src\\/index\\.html\\'\\] { - --tw-content: 'project-a/node modules/my-lib-1/src/index.html'; - content: var(--tw-content); - } - .content-\\[\\'project-a\\/node_modules\\/my-lib-2\\/src\\/index\\.html\\'\\] { - --tw-content: 'project-a/node modules/my-lib-2/src/index.html'; - content: var(--tw-content); - } - .content-\\[\\'project-a\\/src\\/logo\\.jpg\\'\\] { - --tw-content: 'project-a/src/logo.jpg'; - content: var(--tw-content); - } - .content-\\[\\'project-b\\/src\\/index\\.html\\'\\] { - --tw-content: 'project-b/src/index.html'; - content: var(--tw-content); - } - .content-\\[\\'project-c\\/src\\/index\\.html\\'\\] { - --tw-content: 'project-c/src/index.html'; - content: var(--tw-content); - } - .content-\\[\\'project-d\\/my-binary-file\\.bin\\'\\] { - --tw-content: 'project-d/my-binary-file.bin'; - content: var(--tw-content); - } - .content-\\[\\'project-d\\/node_modules\\/my-lib-2\\/src\\/index\\.html\\'\\] { - --tw-content: 'project-d/node modules/my-lib-2/src/index.html'; - content: var(--tw-content); - } - .content-\\[\\'project-d\\/src\\/bar\\.html\\'\\] { - --tw-content: 'project-d/src/bar.html'; - content: var(--tw-content); - } - .content-\\[\\'project-d\\/src\\/index\\.html\\'\\] { - --tw-content: 'project-d/src/index.html'; - content: var(--tw-content); - } - @property --tw-content { - syntax: "*"; - inherits: false; - initial-value: ""; - } - " - `) - - // Watch mode tests - let process = await spawn( - 'pnpm postcss src/index.css --output dist/out.css --watch --verbose', - { - cwd: path.join(root, 'project-a'), - }, - ) - await process.onStderr((message) => message.includes('Waiting for file changes...')) - - // Changes to project-a should not be included in the output, we changed the - // base folder to project-b. - await fs.write( - 'project-a/src/index.html', - html`
`, - ) - await fs.expectFileNotToContain('./project-a/dist/out.css', [ - candidate`[.changed_&]:content-['project-a/src/index.html']`, - ]) - - // Changes to this file should be included, because we explicitly listed - // them using `@source`. - await fs.write( - 'project-a/src/logo.jpg', - html`
`, - ) - await fs.expectFileToContain('./project-a/dist/out.css', [ - candidate`[.changed_&]:content-['project-a/src/logo.jpg']`, - ]) - - // Changes to these files should be included, because we explicitly listed - // them using `@source`. - await fs.write( - 'project-a/node_modules/my-lib-1/src/index.html', - html`
`, - ) - await fs.expectFileToContain('./project-a/dist/out.css', [ - candidate`[.changed_&]:content-['project-a/node_modules/my-lib-1/src/index.html']`, - ]) - - await fs.write( - 'project-a/node_modules/my-lib-2/src/index.html', - html`
`, - ) - await fs.expectFileToContain('./project-a/dist/out.css', [ - candidate`[.changed_&]:content-['project-a/node_modules/my-lib-2/src/index.html']`, - ]) - - // Changes to this file should be included, because we changed the base to - // `project-b`. - await fs.write( - 'project-b/src/index.html', - html`
`, - ) - await fs.expectFileToContain('./project-a/dist/out.css', [ - candidate`[.changed_&]:content-['project-b/src/index.html']`, - ]) - - // Changes to this file should not be included. We did change the base to - // `project-b`, but we still apply the auto source detection rules which - // ignore `node_modules`. - await fs.write( - 'project-b/node_modules/my-lib-3/src/index.html', - html`
`, - ) - await fs.expectFileNotToContain('./project-a/dist/out.css', [ - candidate`[.changed_&]:content-['project-b/node_modules/my-lib-3/src/index.html']`, - ]) - - // Project C was added explicitly via `@source`, therefore changes to these - // files should be included. - await fs.write( - 'project-c/src/index.html', - html`
`, - ) - await fs.expectFileToContain('./project-a/dist/out.css', [ - candidate`[.changed_&]:content-['project-c/src/index.html']`, - ]) - - // Except for these files, since they are ignored by the default auto source - // detection rules. - await fs.write( - 'project-c/src/logo.jpg', - html`
`, - ) - await fs.expectFileNotToContain('./project-a/dist/out.css', [ - candidate`[.changed_&]:content-['project-c/src/logo.jpg']`, - ]) - await fs.write( - 'project-c/node_modules/my-lib-1/src/index.html', - html`
`, - ) - await fs.expectFileNotToContain('./project-a/dist/out.css', [ - candidate`[.changed_&]:content-['project-c/node_modules/my-lib-1/src/index.html']`, - ]) - - // Creating new files in the "root" of auto source detected folders - // We need to create the files and *then* update them because postcss-cli - // does not pick up new files — only changes to existing files. - await fs.create([ - 'project-b/new-file.html', - 'project-b/new-folder/new-file.html', - 'project-c/new-file.html', - 'project-c/new-folder/new-file.html', - ]) - - // If we don't wait writes will be coalesced into a "add" event which - // isn't picked up by postcss-cli. - await new Promise((resolve) => setTimeout(resolve, 100)) - - await fs.write( - 'project-b/new-file.html', - html`
`, - ) - await fs.write( - 'project-b/new-folder/new-file.html', - html`
`, - ) - await fs.write( - 'project-c/new-file.html', - html`
`, - ) - await fs.write( - 'project-c/new-folder/new-file.html', - html`
`, - ) - - await fs.expectFileToContain('./project-a/dist/out.css', [ - candidate`[.created_&]:content-['project-b/new-file.html']`, - candidate`[.created_&]:content-['project-b/new-folder/new-file.html']`, - candidate`[.created_&]:content-['project-c/new-file.html']`, - candidate`[.created_&]:content-['project-c/new-folder/new-file.html']`, - ]) - }, -) - -test( - 'auto source detection disabled', - { - fs: { - 'package.json': json` - { - "dependencies": { - "postcss": "^8", - "postcss-cli": "^10", - "tailwindcss": "workspace:^", - "@tailwindcss/postcss": "workspace:^" - } - } - `, - 'postcss.config.js': js` - module.exports = { - plugins: { - '@tailwindcss/postcss': {}, - }, - } - `, - 'index.css': css` - @reference 'tailwindcss/theme'; - - /* (1) */ - /* - Only './src' should be auto-scanned, not the current working directory */ - /* - .gitignore'd paths should be ignored (node_modules) */ - /* - Binary extensions should be ignored (jpg, zip) */ - @import 'tailwindcss/utilities' source(none); - - /* (2) */ - /* - './pages' should be auto-scanned */ - /* - Only '.html' files should be included */ - /* - './page/ignored.html' should be ignored */ - @source "./pages/**/*.html"; - `, - - '.gitignore': dedent` - /src/ignored - /pages/ignored.html - `, - - // (1) - 'index.html': 'content-["index.html"] content-["BAD"]', // "Root" source is in `./src` - 'src/index.html': 'content-["src/index.html"] content-["BAD"]', - 'src/nested/index.html': 'content-["src/nested/index.html"] content-["BAD"]', - 'src/index.jpg': 'content-["src/index.jpg"] content-["BAD"]', - 'src/nested/index.tar': 'content-["src/nested/index.tar"] content-["BAD"]', - 'src/ignored/index.html': 'content-["src/ignored/index.html"] content-["BAD"]', - - // (4) - 'pages/foo.html': 'content-["pages/foo.html"]', - 'pages/nested/foo.html': 'content-["pages/nested/foo.html"]', - 'pages/ignored.html': 'content-["pages/ignored.html"] content-["BAD"]', - 'pages/foo.jsx': 'content-["pages/foo.jsx"] content-["BAD"]', - 'pages/nested/foo.jsx': 'content-["pages/nested/foo.jsx"] content-["BAD"]', - }, - }, - async ({ fs, exec, expect }) => { - await exec('pnpm postcss index.css --output dist/out.css') - - expect(await fs.dumpFiles('./dist/*.css')).toMatchInlineSnapshot(` - " - --- ./dist/out.css --- - .content-\\[\\"pages\\/foo\\.html\\"\\] { - --tw-content: "pages/foo.html"; - content: var(--tw-content); - } - .content-\\[\\"pages\\/nested\\/foo\\.html\\"\\] { - --tw-content: "pages/nested/foo.html"; - content: var(--tw-content); - } - @property --tw-content { - syntax: "*"; - inherits: false; - initial-value: ""; - } - " - `) - }, -) diff --git a/integrations/postcss/source.test.ts b/integrations/postcss/source.test.ts new file mode 100644 index 000000000000..111a8156238c --- /dev/null +++ b/integrations/postcss/source.test.ts @@ -0,0 +1,799 @@ +import dedent from 'dedent' +import path from 'node:path' +import { candidate, css, html, js, json, test, yaml } from '../utils' + +test( + 'auto source detection kitchen sink', + { + fs: { + 'package.json': json` + { + "dependencies": { + "postcss": "^8", + "postcss-cli": "^10", + "tailwindcss": "workspace:^", + "@tailwindcss/postcss": "workspace:^" + } + } + `, + 'postcss.config.js': js` + module.exports = { + plugins: { + '@tailwindcss/postcss': {}, + }, + } + `, + 'index.css': css` + @reference 'tailwindcss/theme'; + + /* (1) */ + /* - Only './src' should be auto-scanned, not the current working directory */ + /* - .gitignore'd paths should be ignored (node_modules) */ + /* - Binary extensions should be ignored (jpg, zip) */ + @import 'tailwindcss/utilities' source('./src'); + + /* (2) */ + /* - All HTML and JSX files in 'ignored/components' should be scanned */ + /* - All other extensions should be ignored */ + @source "./ignored/components/*.{html,jsx}"; + + /* (3) */ + /* - './components' should be auto-scanned in addition to './src' */ + /* - './components/ignored.html' should still be ignored */ + /* - Binary extensions in './components' should be ignored */ + @source "./components"; + + /* (4) */ + /* - './pages' should be auto-scanned */ + /* - Only '.html' files should be included */ + /* - './page/ignored.html' will not be ignored because of the specific pattern */ + @source "./pages/**/*.html"; + `, + + '.gitignore': dedent` + /src/ignored + /ignored + /components/ignored.html + /pages/ignored.html + `, + + // (1) + 'index.html': 'content-["index.html"] content-["BAD"]', // "Root" source is in `./src` + 'src/index.html': 'content-["src/index.html"]', + 'src/nested/index.html': 'content-["src/nested/index.html"]', + 'src/index.jpg': 'content-["src/index.jpg"] content-["BAD"]', + 'src/nested/index.tar': 'content-["src/nested/index.tar"] content-["BAD"]', + 'src/ignored/index.html': 'content-["src/ignored/index.html"] content-["BAD"]', + + // (2) + 'ignored/components/my-component.html': 'content-["ignored/components/my-component.html"]', + 'ignored/components/my-component.jsx': 'content-["ignored/components/my-component.jsx"]', + + // Ignored and not explicitly listed by (2) + 'ignored/components/my-component.tsx': + 'content-["ignored/components/my-component.tsx"] content-["BAD"]', + 'ignored/components/nested/my-component.html': + 'content-["ignored/components/nested/my-component.html"] content-["BAD"]', + + // (3) + 'components/my-component.tsx': 'content-["components/my-component.tsx"]', + 'components/nested/my-component.tsx': 'content-["components/nested/my-component.tsx"]', + 'components/ignored.html': 'content-["components/ignored.html"] content-["BAD"]', + + // (4) + 'pages/foo.html': 'content-["pages/foo.html"]', + 'pages/nested/foo.html': 'content-["pages/nested/foo.html"]', + 'pages/ignored.html': 'content-["pages/ignored.html"]', + 'pages/foo.jsx': 'content-["pages/foo.jsx"] content-["BAD"]', + 'pages/nested/foo.jsx': 'content-["pages/nested/foo.jsx"] content-["BAD"]', + }, + }, + async ({ fs, exec, expect }) => { + await exec('pnpm postcss index.css --output dist/out.css') + + expect(await fs.dumpFiles('./dist/*.css')).toMatchInlineSnapshot(` + " + --- ./dist/out.css --- + .content-\\[\\"components\\/my-component\\.tsx\\"\\] { + --tw-content: "components/my-component.tsx"; + content: var(--tw-content); + } + .content-\\[\\"components\\/nested\\/my-component\\.tsx\\"\\] { + --tw-content: "components/nested/my-component.tsx"; + content: var(--tw-content); + } + .content-\\[\\"ignored\\/components\\/my-component\\.html\\"\\] { + --tw-content: "ignored/components/my-component.html"; + content: var(--tw-content); + } + .content-\\[\\"ignored\\/components\\/my-component\\.jsx\\"\\] { + --tw-content: "ignored/components/my-component.jsx"; + content: var(--tw-content); + } + .content-\\[\\"pages\\/foo\\.html\\"\\] { + --tw-content: "pages/foo.html"; + content: var(--tw-content); + } + .content-\\[\\"pages\\/ignored\\.html\\"\\] { + --tw-content: "pages/ignored.html"; + content: var(--tw-content); + } + .content-\\[\\"pages\\/nested\\/foo\\.html\\"\\] { + --tw-content: "pages/nested/foo.html"; + content: var(--tw-content); + } + .content-\\[\\"src\\/index\\.html\\"\\] { + --tw-content: "src/index.html"; + content: var(--tw-content); + } + .content-\\[\\"src\\/nested\\/index\\.html\\"\\] { + --tw-content: "src/nested/index.html"; + content: var(--tw-content); + } + @property --tw-content { + syntax: "*"; + inherits: false; + initial-value: ""; + } + " + `) + }, +) + +test( + 'auto source detection in depth, source(…) and `@source` can be configured to use auto source detection (build + watch mode)', + { + fs: { + 'package.json': json`{}`, + 'pnpm-workspace.yaml': yaml` + # + packages: + - project-a + `, + 'project-a/package.json': json` + { + "dependencies": { + "postcss": "^8", + "postcss-cli": "^10", + "tailwindcss": "workspace:^", + "@tailwindcss/postcss": "workspace:^" + } + } + `, + 'project-a/postcss.config.js': js` + module.exports = { + plugins: { + '@tailwindcss/postcss': {}, + }, + } + `, + 'project-a/src/index.css': css` + @reference 'tailwindcss/theme'; + + /* Run auto-content detection in ../../project-b */ + @import 'tailwindcss/utilities' source('../../project-b'); + + /* Explicitly using node_modules in the @source allows git ignored folders */ + @source '../node_modules/{my-lib-1,my-lib-2}/src/**/*.html'; + + /* We typically ignore these extensions, but now include them explicitly */ + @source './logo.{jpg,png}'; + + /* Project C should apply auto source detection */ + @source '../../project-c'; + + /* Project D should apply auto source detection rules, such as ignoring node_modules */ + @source '../../project-d/**/*.{html,js}'; + @source '../../project-d/**/*.bin'; + + /* Same as above, but my-lib-2 _should_ be includes */ + @source '../../project-d/node_modules/my-lib-2/src/*.{html,js}'; + + /* bar.html is git ignored, but explicitly listed here to scan */ + @source '../../project-d/src/bar.html'; + `, + + // Project A is the current folder, but we explicitly configured + // `source(project-b)`, therefore project-a should not be included in + // the output. + 'project-a/src/index.html': html` +
+ `, + + // Project A explicitly includes an extension we usually ignore, + // therefore it should be included in the output. + 'project-a/src/logo.jpg': html` +
+ `, + + // Project A explicitly includes node_modules/{my-lib-1,my-lib-2}, + // therefore these files should be included in the output. + 'project-a/node_modules/my-lib-1/src/index.html': html` +
+ `, + 'project-a/node_modules/my-lib-2/src/index.html': html` +
+ `, + + // Project B is the configured `source(…)`, therefore auto source + // detection should include known extensions and folders in the output. + 'project-b/src/index.html': html` +
+ `, + + // Project B is the configured `source(…)`, therefore auto source + // detection should apply and node_modules should not be included in the + // output. + 'project-b/node_modules/my-lib-3/src/index.html': html` +
+ `, + + // Project C should apply auto source detection, therefore known + // extensions and folders should be included in the output. + 'project-c/src/index.html': html` +
+ `, + + // Project C should apply auto source detection, therefore known ignored + // extensions should not be included in the output. + 'project-c/src/logo.jpg': html` +
+ `, + + // Project C should apply auto source detection, therefore node_modules + // should not be included in the output. + 'project-c/node_modules/my-lib-1/src/index.html': html` +
+ `, + + // Project D should apply auto source detection rules, such as ignoring + // node_modules. + 'project-d/node_modules/my-lib-1/src/index.html': html` +
+ `, + + // Project D has an explicit glob containing node_modules, thus should include the html file + 'project-d/node_modules/my-lib-2/src/index.html': html` +
+ `, + + 'project-d/src/.gitignore': dedent` + foo.html + bar.html + `, + + // Project D, foo.html is ignored by the gitignore file but the source rule is explicit about + // adding all `.html` files. + 'project-d/src/foo.html': html` +
+ `, + + // Project D, bar.html is ignored by the gitignore file. But explicitly + // listed as a `@source` glob. + 'project-d/src/bar.html': html` +
+ `, + + // Project D should look for files with the extensions html and js. + 'project-d/src/index.html': html` +
+ `, + + // Project D should have a binary file even though we ignore binary files + // by default, but it's explicitly listed. + 'project-d/my-binary-file.bin': html` +
+ `, + }, + }, + async ({ fs, exec, spawn, root, expect }) => { + await exec('pnpm postcss src/index.css --output dist/out.css --verbose', { + cwd: path.join(root, 'project-a'), + }) + + expect(await fs.dumpFiles('./project-a/dist/*.css')).toMatchInlineSnapshot(` + " + --- ./project-a/dist/out.css --- + .content-\\[\\'project-a\\/node_modules\\/my-lib-1\\/src\\/index\\.html\\'\\] { + --tw-content: 'project-a/node modules/my-lib-1/src/index.html'; + content: var(--tw-content); + } + .content-\\[\\'project-a\\/node_modules\\/my-lib-2\\/src\\/index\\.html\\'\\] { + --tw-content: 'project-a/node modules/my-lib-2/src/index.html'; + content: var(--tw-content); + } + .content-\\[\\'project-a\\/src\\/logo\\.jpg\\'\\] { + --tw-content: 'project-a/src/logo.jpg'; + content: var(--tw-content); + } + .content-\\[\\'project-b\\/src\\/index\\.html\\'\\] { + --tw-content: 'project-b/src/index.html'; + content: var(--tw-content); + } + .content-\\[\\'project-c\\/src\\/index\\.html\\'\\] { + --tw-content: 'project-c/src/index.html'; + content: var(--tw-content); + } + .content-\\[\\'project-d\\/my-binary-file\\.bin\\'\\] { + --tw-content: 'project-d/my-binary-file.bin'; + content: var(--tw-content); + } + .content-\\[\\'project-d\\/node_modules\\/my-lib-2\\/src\\/index\\.html\\'\\] { + --tw-content: 'project-d/node modules/my-lib-2/src/index.html'; + content: var(--tw-content); + } + .content-\\[\\'project-d\\/src\\/bar\\.html\\'\\] { + --tw-content: 'project-d/src/bar.html'; + content: var(--tw-content); + } + .content-\\[\\'project-d\\/src\\/foo\\.html\\'\\] { + --tw-content: 'project-d/src/foo.html'; + content: var(--tw-content); + } + .content-\\[\\'project-d\\/src\\/index\\.html\\'\\] { + --tw-content: 'project-d/src/index.html'; + content: var(--tw-content); + } + @property --tw-content { + syntax: "*"; + inherits: false; + initial-value: ""; + } + " + `) + + // Watch mode tests + let process = await spawn( + 'pnpm postcss src/index.css --output dist/out.css --watch --verbose', + { + cwd: path.join(root, 'project-a'), + }, + ) + await process.onStderr((message) => message.includes('Waiting for file changes...')) + + // Changes to project-a should not be included in the output, we changed the + // base folder to project-b. + await fs.write( + 'project-a/src/index.html', + html`
`, + ) + await fs.expectFileNotToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-a/src/index.html']`, + ]) + + // Changes to this file should be included, because we explicitly listed + // them using `@source`. + await fs.write( + 'project-a/src/logo.jpg', + html`
`, + ) + await fs.expectFileToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-a/src/logo.jpg']`, + ]) + + // Changes to these files should be included, because we explicitly listed + // them using `@source`. + await fs.write( + 'project-a/node_modules/my-lib-1/src/index.html', + html`
`, + ) + await fs.expectFileToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-a/node_modules/my-lib-1/src/index.html']`, + ]) + + await fs.write( + 'project-a/node_modules/my-lib-2/src/index.html', + html`
`, + ) + await fs.expectFileToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-a/node_modules/my-lib-2/src/index.html']`, + ]) + + // Changes to this file should be included, because we changed the base to + // `project-b`. + await fs.write( + 'project-b/src/index.html', + html`
`, + ) + await fs.expectFileToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-b/src/index.html']`, + ]) + + // Changes to this file should not be included. We did change the base to + // `project-b`, but we still apply the auto source detection rules which + // ignore `node_modules`. + await fs.write( + 'project-b/node_modules/my-lib-3/src/index.html', + html`
`, + ) + await fs.expectFileNotToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-b/node_modules/my-lib-3/src/index.html']`, + ]) + + // Project C was added explicitly via `@source`, therefore changes to these + // files should be included. + await fs.write( + 'project-c/src/index.html', + html`
`, + ) + await fs.expectFileToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-c/src/index.html']`, + ]) + + // Except for these files, since they are ignored by the default auto source + // detection rules. + await fs.write( + 'project-c/src/logo.jpg', + html`
`, + ) + await fs.expectFileNotToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-c/src/logo.jpg']`, + ]) + await fs.write( + 'project-c/node_modules/my-lib-1/src/index.html', + html`
`, + ) + await fs.expectFileNotToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-c/node_modules/my-lib-1/src/index.html']`, + ]) + + // Creating new files in the "root" of auto source detected folders + // We need to create the files and *then* update them because postcss-cli + // does not pick up new files — only changes to existing files. + await fs.create([ + 'project-b/new-file.html', + 'project-b/new-folder/new-file.html', + 'project-c/new-file.html', + 'project-c/new-folder/new-file.html', + ]) + + // If we don't wait writes will be coalesced into a "add" event which + // isn't picked up by postcss-cli. + await new Promise((resolve) => setTimeout(resolve, 100)) + + await fs.write( + 'project-b/new-file.html', + html`
`, + ) + await fs.write( + 'project-b/new-folder/new-file.html', + html`
`, + ) + await fs.write( + 'project-c/new-file.html', + html`
`, + ) + await fs.write( + 'project-c/new-folder/new-file.html', + html`
`, + ) + + await fs.expectFileToContain('./project-a/dist/out.css', [ + candidate`[.created_&]:content-['project-b/new-file.html']`, + candidate`[.created_&]:content-['project-b/new-folder/new-file.html']`, + candidate`[.created_&]:content-['project-c/new-file.html']`, + candidate`[.created_&]:content-['project-c/new-folder/new-file.html']`, + ]) + }, +) + +test( + 'auto source detection disabled', + { + fs: { + 'package.json': json` + { + "dependencies": { + "postcss": "^8", + "postcss-cli": "^10", + "tailwindcss": "workspace:^", + "@tailwindcss/postcss": "workspace:^" + } + } + `, + 'postcss.config.js': js` + module.exports = { + plugins: { + '@tailwindcss/postcss': {}, + }, + } + `, + 'index.css': css` + @reference 'tailwindcss/theme'; + + /* (1) */ + /* - Only './src' should be auto-scanned, not the current working directory */ + /* - .gitignore'd paths should be ignored (node_modules) */ + /* - Binary extensions should be ignored (jpg, zip) */ + @import 'tailwindcss/utilities' source(none); + + /* (2) */ + /* - './pages' should be auto-scanned */ + /* - Only '.html' files should be included */ + /* - './page/ignored.html' will not be ignored because of the specific pattern */ + @source "./pages/**/*.html"; + `, + + '.gitignore': dedent` + /src/ignored + /pages/ignored.html + `, + + // (1) + 'index.html': 'content-["index.html"] content-["BAD"]', // "Root" source is in `./src` + 'src/index.html': 'content-["src/index.html"] content-["BAD"]', + 'src/nested/index.html': 'content-["src/nested/index.html"] content-["BAD"]', + 'src/index.jpg': 'content-["src/index.jpg"] content-["BAD"]', + 'src/nested/index.tar': 'content-["src/nested/index.tar"] content-["BAD"]', + 'src/ignored/index.html': 'content-["src/ignored/index.html"] content-["BAD"]', + + // (4) + 'pages/foo.html': 'content-["pages/foo.html"]', + 'pages/nested/foo.html': 'content-["pages/nested/foo.html"]', + 'pages/ignored.html': 'content-["pages/ignored.html"]', + 'pages/foo.jsx': 'content-["pages/foo.jsx"] content-["BAD"]', + 'pages/nested/foo.jsx': 'content-["pages/nested/foo.jsx"] content-["BAD"]', + }, + }, + async ({ fs, exec, expect }) => { + await exec('pnpm postcss index.css --output dist/out.css') + + expect(await fs.dumpFiles('./dist/*.css')).toMatchInlineSnapshot(` + " + --- ./dist/out.css --- + .content-\\[\\"pages\\/foo\\.html\\"\\] { + --tw-content: "pages/foo.html"; + content: var(--tw-content); + } + .content-\\[\\"pages\\/ignored\\.html\\"\\] { + --tw-content: "pages/ignored.html"; + content: var(--tw-content); + } + .content-\\[\\"pages\\/nested\\/foo\\.html\\"\\] { + --tw-content: "pages/nested/foo.html"; + content: var(--tw-content); + } + @property --tw-content { + syntax: "*"; + inherits: false; + initial-value: ""; + } + " + `) + }, +) + +test( + '`@source not "…"`', + { + fs: { + 'package.json': json`{}`, + 'pnpm-workspace.yaml': yaml` + # + packages: + - project-a + `, + 'project-a/package.json': json` + { + "dependencies": { + "postcss": "^8", + "postcss-cli": "^10", + "tailwindcss": "workspace:^", + "@tailwindcss/postcss": "workspace:^" + } + } + `, + 'project-a/postcss.config.js': js` + module.exports = { + plugins: { + '@tailwindcss/postcss': {}, + }, + } + `, + 'project-a/src/index.css': css` + @reference 'tailwindcss/theme'; + @import 'tailwindcss/utilities'; + + /* Ignore a specific file */ + @source not "./ignore-me-file.html"; + + /* Ignore a entire folder */ + @source not "./ignore-me-folder"; + + /* Ignore an extension */ + @source not "**/*.ts"; + + /* Explicit source detection for 'project-b' */ + @source "../../project-b/**/*.html"; + + /* Explicitly ignoring a file in 'project-b' */ + @source not "../../project-b/src/ignore-me.html"; + `, + 'project-a/src/ignore-me-file.html': html` +
+
+
+ `, + 'project-a/src/ignore-me-folder/index.html': html` +
+
+
+ `, + 'project-a/src/keep-me.html': html`
`, + 'project-a/src/ignore-me-extension.ts': html` +
+
+
+ `, + 'project-b/src/ignore-me.html': html` +
+
+
+ `, + 'project-b/src/keep-me.html': html` +
+
+
+ `, + }, + }, + async ({ fs, exec, spawn, root, expect }) => { + await exec('pnpm postcss src/index.css --output dist/out.css --verbose', { + cwd: path.join(root, 'project-a'), + }) + + expect(await fs.dumpFiles('./project-a/dist/*.css')).toMatchInlineSnapshot(` + " + --- ./project-a/dist/out.css --- + .content-\\[\\'keep-me\\.html\\'\\] { + --tw-content: 'keep-me.html'; + content: var(--tw-content); + } + .content-\\[\\'project-b\\/src\\/keep-me\\.html\\'\\] { + --tw-content: 'project-b/src/keep-me.html'; + content: var(--tw-content); + } + @property --tw-content { + syntax: "*"; + inherits: false; + initial-value: ""; + } + " + `) + + // Watch mode tests + let process = await spawn( + 'pnpm postcss src/index.css --output dist/out.css --watch --verbose', + { + cwd: path.join(root, 'project-a'), + }, + ) + await process.onStderr((message) => message.includes('Waiting for file changes...')) + + fs.expectFileNotToContain('./project-a/dist/out.css', [ + candidate`content-['project-a/src/ignore-me-file.html']`, + candidate`content-['project-a/src/ignore-me-folder/index.html']`, + candidate`content-['project-b/src/ignore-me.html']`, + ]) + + // Changes to the keep-me files should be included + await fs.write( + 'project-a/src/keep-me.html', + html`
`, + ) + await fs.expectFileToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-a/src/keep-me.html']`, + ]) + + await fs.write( + 'project-b/src/keep-me.html', + html`
`, + ) + await fs.expectFileToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-b/src/keep-me.html']`, + ]) + + // Changes to the ignored files should not be included + await fs.write( + 'project-a/src/ignore-me.html', + html`
`, + ) + await fs.expectFileNotToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-a/src/ignore-me.html']`, + ]) + + await fs.write( + 'project-b/src/ignore-me.html', + html`
`, + ) + await fs.expectFileNotToContain('./project-a/dist/out.css', [ + candidate`[.changed_&]:content-['project-b/src/ignore-me.html']`, + ]) + + fs.expectFileNotToContain('./project-a/dist/out.css', [ + candidate`content-['project-a/src/ignore-me-file.html']`, + candidate`content-['project-a/src/ignore-me-folder/index.html']`, + candidate`content-['project-b/src/ignore-me.html']`, + ]) + + // Creating new files that match the source patterns should be included. + await fs.create([ + 'project-a/src/new-file.html', + 'project-a/src/new-folder/new-file.html', + 'project-b/src/new-file.html', + 'project-b/src/new-folder/new-file.html', + ]) + + await fs.write( + 'project-a/src/new-file.html', + html`
`, + ) + await fs.write( + 'project-a/src/new-folder/new-file.html', + html`
`, + ) + await fs.write( + 'project-b/src/new-file.html', + html`
`, + ) + await fs.write( + 'project-b/src/new-folder/new-file.html', + html`
`, + ) + + // If we don't wait writes will be coalesced into a "add" event which + // isn't picked up by postcss-cli. + await new Promise((resolve) => setTimeout(resolve, 100)) + + await fs.expectFileToContain('./project-a/dist/out.css', [ + candidate`[.created_&]:content-['project-a/src/new-file.html']`, + candidate`[.created_&]:content-['project-a/src/new-folder/new-file.html']`, + candidate`[.created_&]:content-['project-b/src/new-file.html']`, + candidate`[.created_&]:content-['project-b/src/new-folder/new-file.html']`, + ]) + + fs.expectFileNotToContain('./project-a/dist/out.css', [ + candidate`content-['project-a/src/ignore-me-file.html']`, + candidate`content-['project-a/src/ignore-me-folder/index.html']`, + candidate`content-['project-b/src/ignore-me.html']`, + ]) + }, +) diff --git a/packages/@tailwindcss-cli/src/commands/build/index.ts b/packages/@tailwindcss-cli/src/commands/build/index.ts index 4e9c99e23459..8eb60d90286b 100644 --- a/packages/@tailwindcss-cli/src/commands/build/index.ts +++ b/packages/@tailwindcss-cli/src/commands/build/index.ts @@ -175,12 +175,12 @@ export async function handle(args: Result>) { // No root specified, use the base directory if (compiler.root === null) { - return [{ base, pattern: '**/*' }] + return [{ base, pattern: '**/*', negated: false }] } // Use the specified root - return [compiler.root] - })().concat(compiler.globs) + return [{ ...compiler.root, negated: false }] + })().concat(compiler.sources) let scanner = new Scanner({ sources }) DEBUG && I.end('Setup compiler') @@ -334,18 +334,6 @@ export async function handle(args: Result>) { eprintln(`Done in ${formatDuration(end - start)}`) } -function watchDirectories(scanner: Scanner) { - return scanner.globs.flatMap((globEntry) => { - // We don't want a watcher for negated globs. - if (globEntry.pattern[0] === '!') return [] - - // We don't want a watcher for files, only directories. - if (globEntry.pattern === '') return [] - - return globEntry.base - }) -} - async function createWatchers(dirs: string[], cb: (files: string[]) => void) { // Remove any directories that are children of an already watched directory. // If we don't we may not get notified of certain filesystem events regardless @@ -474,3 +462,7 @@ function optimizeCss( // nesting is applied. This creates a more optimized output. return optimize(optimize(Buffer.from(input))).toString() } + +function watchDirectories(scanner: Scanner) { + return [...new Set(scanner.normalizedSources.flatMap((globEntry) => globEntry.base))] +} diff --git a/packages/@tailwindcss-postcss/src/index.ts b/packages/@tailwindcss-postcss/src/index.ts index e21217f12b86..d8ef557767a8 100644 --- a/packages/@tailwindcss-postcss/src/index.ts +++ b/packages/@tailwindcss-postcss/src/index.ts @@ -190,12 +190,12 @@ function tailwindcss(opts: PluginOptions = {}): AcceptedPlugin { // No root specified, use the base directory if (context.compiler.root === null) { - return [{ base, pattern: '**/*' }] + return [{ base, pattern: '**/*', negated: false }] } // Use the specified root - return [context.compiler.root] - })().concat(context.compiler.globs) + return [{ ...context.compiler.root, negated: false }] + })().concat(context.compiler.sources) // Look for candidates used to generate the CSS context.scanner = new Scanner({ sources }) diff --git a/packages/@tailwindcss-upgrade/src/index.ts b/packages/@tailwindcss-upgrade/src/index.ts index 9069b4ea84ba..f8f97934fe03 100644 --- a/packages/@tailwindcss-upgrade/src/index.ts +++ b/packages/@tailwindcss-upgrade/src/index.ts @@ -163,7 +163,7 @@ async function run() { // Template migrations for (let config of configBySheet.values()) { let set = new Set() - for (let globEntry of config.globs.flatMap((entry) => hoistStaticGlobParts(entry))) { + for (let globEntry of config.sources.flatMap((entry) => hoistStaticGlobParts(entry))) { let files = await globby([globEntry.pattern], { absolute: true, gitignore: true, diff --git a/packages/@tailwindcss-upgrade/src/migrate-js-config.ts b/packages/@tailwindcss-upgrade/src/migrate-js-config.ts index ac0a71a15e4b..46d19ef6c4a9 100644 --- a/packages/@tailwindcss-upgrade/src/migrate-js-config.ts +++ b/packages/@tailwindcss-upgrade/src/migrate-js-config.ts @@ -100,6 +100,7 @@ async function migrateTheme( let configToResolve: ConfigFile = { base, config: { ...unresolvedConfig, plugins: [], presets: undefined }, + reference: false, } let { resolvedConfig, replacedThemeKeys } = resolveConfig(designSystem, [configToResolve]) @@ -274,7 +275,11 @@ async function migrateContent( throw new Error('Unsupported content value: ' + pattern) } - let sourceFiles = patternSourceFiles({ base, pattern }) + let sourceFiles = patternSourceFiles({ + base, + pattern: pattern[0] === '!' ? pattern.slice(1) : pattern, + negated: pattern[0] === '!', + }) let autoContentContainsAllSourceFiles = true for (let sourceFile of sourceFiles) { @@ -375,12 +380,20 @@ function keyframesToCss(keyframes: Record): string { } function autodetectedSourceFiles(base: string) { - let scanner = new Scanner({ sources: [{ base, pattern: '**/*' }] }) + let scanner = new Scanner({ + sources: [ + { + base, + pattern: '**/*', + negated: false, + }, + ], + }) scanner.scan() return scanner.files } -function patternSourceFiles(source: { base: string; pattern: string }): string[] { +function patternSourceFiles(source: { base: string; pattern: string; negated: boolean }): string[] { let scanner = new Scanner({ sources: [source] }) scanner.scan() return scanner.files diff --git a/packages/@tailwindcss-upgrade/src/template/prepare-config.ts b/packages/@tailwindcss-upgrade/src/template/prepare-config.ts index e936df6a3332..b4a6e18b31e1 100644 --- a/packages/@tailwindcss-upgrade/src/template/prepare-config.ts +++ b/packages/@tailwindcss-upgrade/src/template/prepare-config.ts @@ -19,7 +19,7 @@ export async function prepareConfig( options: { base: string }, ): Promise<{ designSystem: DesignSystem - globs: { base: string; pattern: string }[] + sources: { base: string; pattern: string }[] userConfig: Config configFilePath: string @@ -59,7 +59,7 @@ export async function prepareConfig( return { designSystem, - globs: compiler.globs, + sources: compiler.sources, userConfig, newPrefix, configFilePath, @@ -82,7 +82,7 @@ async function createResolvedUserConfig(fullConfigPath: string): Promise ]) return resolveConfig(noopDesignSystem, [ - { base: dirname(fullConfigPath), config: unresolvedUserConfig }, + { base: dirname(fullConfigPath), config: unresolvedUserConfig, reference: false }, ]).resolvedConfig as any } diff --git a/packages/@tailwindcss-vite/src/index.ts b/packages/@tailwindcss-vite/src/index.ts index 1d27ee1689fd..de6b2cde33cd 100644 --- a/packages/@tailwindcss-vite/src/index.ts +++ b/packages/@tailwindcss-vite/src/index.ts @@ -268,12 +268,12 @@ class Root { // No root specified, auto-detect based on the `**/*` pattern if (this.compiler.root === null) { - return [{ base: this.base, pattern: '**/*' }] + return [{ base: this.base, pattern: '**/*', negated: false }] } // Use the specified root - return [this.compiler.root] - })().concat(this.compiler.globs) + return [{ ...this.compiler.root, negated: false }] + })().concat(this.compiler.sources) this.scanner = new Scanner({ sources }) DEBUG && I.end('Setup scanner') diff --git a/packages/tailwindcss/src/at-import.test.ts b/packages/tailwindcss/src/at-import.test.ts index 32a203d5166d..6b418169ff8e 100644 --- a/packages/tailwindcss/src/at-import.test.ts +++ b/packages/tailwindcss/src/at-import.test.ts @@ -474,9 +474,9 @@ test('emits the right base for @source directives inside nested files', async () { base: '/root', loadStylesheet }, ) - expect(compiler.globs).toEqual([ - { pattern: './nested/**/*.css', base: '/root/foo' }, - { pattern: './root/**/*.css', base: '/root' }, + expect(compiler.sources).toEqual([ + { pattern: './nested/**/*.css', base: '/root/foo', negated: false }, + { pattern: './root/**/*.css', base: '/root', negated: false }, ]) }) @@ -521,15 +521,15 @@ test('emits the right base for @source found inside JS configs and plugins from { base: '/root', loadStylesheet, loadModule }, ) - expect(compiler.globs).toEqual([ - { pattern: './nested-plugin/*.html', base: '/root/foo-plugin' }, - { pattern: './root-plugin/*.html', base: '/root-plugin' }, + expect(compiler.sources).toEqual([ + { pattern: './nested-plugin/*.html', base: '/root/foo-plugin', negated: false }, + { pattern: './root-plugin/*.html', base: '/root-plugin', negated: false }, - { pattern: './nested-config-plugin/*.html', base: '/root/foo-config' }, - { pattern: './nested-config/*.html', base: '/root/foo-config' }, + { pattern: './nested-config-plugin/*.html', base: '/root/foo-config', negated: false }, + { pattern: './nested-config/*.html', base: '/root/foo-config', negated: false }, - { pattern: './root-config-plugin/*.html', base: '/root-config' }, - { pattern: './root-config/*.html', base: '/root-config' }, + { pattern: './root-config-plugin/*.html', base: '/root-config', negated: false }, + { pattern: './root-config/*.html', base: '/root-config', negated: false }, ]) }) diff --git a/packages/tailwindcss/src/compat/apply-compat-hooks.ts b/packages/tailwindcss/src/compat/apply-compat-hooks.ts index fe30f6270ba4..1cf9b1ce5c77 100644 --- a/packages/tailwindcss/src/compat/apply-compat-hooks.ts +++ b/packages/tailwindcss/src/compat/apply-compat-hooks.ts @@ -21,7 +21,7 @@ export async function applyCompatibilityHooks({ base, ast, loadModule, - globs, + sources, }: { designSystem: DesignSystem base: string @@ -31,7 +31,7 @@ export async function applyCompatibilityHooks({ base: string, resourceHint: 'plugin' | 'config', ) => Promise<{ module: any; base: string }> - globs: { origin?: string; pattern: string }[] + sources: { base: string; pattern: string; negated: boolean }[] }) { let features = Features.None let pluginPaths: [{ id: string; base: string; reference: boolean }, CssPluginOptions | null][] = @@ -145,7 +145,7 @@ export async function applyCompatibilityHooks({ designSystem, base, ast, - globs, + sources, configs: [], pluginDetails: [], }) @@ -186,7 +186,7 @@ export async function applyCompatibilityHooks({ designSystem, base, ast, - globs, + sources, configs, pluginDetails, }) @@ -198,14 +198,14 @@ function upgradeToFullPluginSupport({ designSystem, base, ast, - globs, + sources, configs, pluginDetails, }: { designSystem: DesignSystem base: string ast: AstNode[] - globs: { origin?: string; pattern: string }[] + sources: { base: string; pattern: string; negated: boolean }[] configs: { path: string base: string @@ -362,7 +362,12 @@ function upgradeToFullPluginSupport({ ) } - globs.push(file) + let negated = false + if (file.pattern[0] == '!') { + negated = true + file.pattern = file.pattern.slice(1) + } + sources.push({ ...file, negated }) } return features } diff --git a/packages/tailwindcss/src/compat/config.test.ts b/packages/tailwindcss/src/compat/config.test.ts index f856a557d8b6..372f5f3e3ae0 100644 --- a/packages/tailwindcss/src/compat/config.test.ts +++ b/packages/tailwindcss/src/compat/config.test.ts @@ -15,7 +15,7 @@ test('Config files can add content', async () => { loadModule: async () => ({ module: { content: ['./file.txt'] }, base: '/root' }), }) - expect(compiler.globs).toEqual([{ base: '/root', pattern: './file.txt' }]) + expect(compiler.sources).toEqual([{ base: '/root', pattern: './file.txt', negated: false }]) }) test('Config files can change dark mode (media)', async () => { diff --git a/packages/tailwindcss/src/feature-flags.ts b/packages/tailwindcss/src/feature-flags.ts index 203bfe335af8..d00d1d45b1ae 100644 --- a/packages/tailwindcss/src/feature-flags.ts +++ b/packages/tailwindcss/src/feature-flags.ts @@ -5,5 +5,6 @@ export const enablePointerVariants = process.env.FEATURES_ENV !== 'stable' export const enableSafeAlignment = process.env.FEATURES_ENV !== 'stable' export const enableScripting = process.env.FEATURES_ENV !== 'stable' export const enableSourceInline = process.env.FEATURES_ENV !== 'stable' +export const enableSourceNot = process.env.FEATURES_ENV !== 'stable' export const enableUserValid = process.env.FEATURES_ENV !== 'stable' export const enableWrapAnywhere = process.env.FEATURES_ENV !== 'stable' diff --git a/packages/tailwindcss/src/index.test.ts b/packages/tailwindcss/src/index.test.ts index 25d30f2ea670..9a4279d546ee 100644 --- a/packages/tailwindcss/src/index.test.ts +++ b/packages/tailwindcss/src/index.test.ts @@ -3236,18 +3236,18 @@ describe('plugins', () => { describe('@source', () => { test('emits @source files', async () => { - let { globs } = await compile( + let { sources } = await compile( css` @source "./foo/bar/*.ts"; `, { base: '/root' }, ) - expect(globs).toEqual([{ pattern: './foo/bar/*.ts', base: '/root' }]) + expect(sources).toEqual([{ pattern: './foo/bar/*.ts', base: '/root', negated: false }]) }) test('emits multiple @source files', async () => { - let { globs } = await compile( + let { sources } = await compile( css` @source "./foo/**/*.ts"; @source "./php/secr3t/smarty.php"; @@ -3255,9 +3255,24 @@ describe('@source', () => { { base: '/root' }, ) - expect(globs).toEqual([ - { pattern: './foo/**/*.ts', base: '/root' }, - { pattern: './php/secr3t/smarty.php', base: '/root' }, + expect(sources).toEqual([ + { pattern: './foo/**/*.ts', base: '/root', negated: false }, + { pattern: './php/secr3t/smarty.php', base: '/root', negated: false }, + ]) + }) + + test('emits negated @source files', async () => { + let { sources } = await compile( + css` + @source not "./foo/**/*.ts"; + @source not "./php/secr3t/smarty.php"; + `, + { base: '/root' }, + ) + + expect(sources).toEqual([ + { pattern: './foo/**/*.ts', base: '/root', negated: true }, + { pattern: './php/secr3t/smarty.php', base: '/root', negated: true }, ]) }) diff --git a/packages/tailwindcss/src/index.ts b/packages/tailwindcss/src/index.ts index ebf4b3875baf..d8d4f73315ea 100644 --- a/packages/tailwindcss/src/index.ts +++ b/packages/tailwindcss/src/index.ts @@ -26,7 +26,7 @@ import { applyVariant, compileCandidates } from './compile' import { substituteFunctions } from './css-functions' import * as CSS from './css-parser' import { buildDesignSystem, type DesignSystem } from './design-system' -import { enableSourceInline } from './feature-flags' +import { enableSourceInline, enableSourceNot } from './feature-flags' import { Theme, ThemeOptions } from './theme' import { createCssUtility } from './utilities' import { expand } from './utils/brace-expansion' @@ -128,7 +128,7 @@ async function parseCss( let firstThemeRule = null as StyleRule | null let utilitiesNode = null as AtRule | null let variantNodes: AtRule[] = [] - let globs: { base: string; pattern: string }[] = [] + let sources: { base: string; pattern: string; negated: boolean }[] = [] let inlineCandidates: string[] = [] let ignoredCandidates: string[] = [] let root = null as Root @@ -216,12 +216,14 @@ async function parseCss( let inline = false let path = node.params - if (enableSourceInline) { + if (enableSourceNot) { if (path[0] === 'n' && path.startsWith('not ')) { not = true path = path.slice(4) } + } + if (enableSourceInline) { if (path[0] === 'i' && path.startsWith('inline(')) { inline = true path = path.slice(7, -1) @@ -247,7 +249,11 @@ async function parseCss( } } } else { - globs.push({ base: context.base as string, pattern: source }) + sources.push({ + base: context.base as string, + pattern: source, + negated: enableSourceNot ? not : false, + }) } replaceWith([]) return @@ -552,7 +558,7 @@ async function parseCss( base, ast, loadModule, - globs, + sources, }) for (let customVariant of customVariants) { @@ -637,7 +643,7 @@ async function parseCss( return { designSystem, ast, - globs, + sources, root, utilitiesNode, features, @@ -649,12 +655,12 @@ export async function compileAst( input: AstNode[], opts: CompileOptions = {}, ): Promise<{ - globs: { base: string; pattern: string }[] + sources: { base: string; pattern: string; negated: boolean }[] root: Root features: Features build(candidates: string[]): AstNode[] }> { - let { designSystem, ast, globs, root, utilitiesNode, features, inlineCandidates } = + let { designSystem, ast, sources, root, utilitiesNode, features, inlineCandidates } = await parseCss(input, opts) if (process.env.NODE_ENV !== 'test') { @@ -682,7 +688,7 @@ export async function compileAst( } return { - globs, + sources, root, features, build(newRawCandidates: string[]) { @@ -747,7 +753,7 @@ export async function compile( css: string, opts: CompileOptions = {}, ): Promise<{ - globs: { base: string; pattern: string }[] + sources: { base: string; pattern: string; negated: boolean }[] root: Root features: Features build(candidates: string[]): string