diff --git a/Cargo.lock b/Cargo.lock index 9b45ca60c5..69845d9110 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2544,6 +2544,7 @@ dependencies = [ "supports-color", "tabled", "tempfile", + "test-utils", "tokio", "tokio-stream", "toml", @@ -2595,6 +2596,7 @@ dependencies = [ "shellexpand", "strum", "tempfile", + "test-utils", "thiserror 2.0.16", "tokio", "toml", @@ -4380,6 +4382,10 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" +[[package]] +name = "test-utils" +version = "0.20.1" + [[package]] name = "testing_table" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index b3c3d1e540..bfdd133a3b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["lychee-bin", "lychee-lib", "examples/*", "benches"] +members = ["lychee-bin", "lychee-lib", "examples/*", "benches", "test-utils"] resolver = "2" [workspace.package] diff --git a/fixtures/TEST.md b/fixtures/TEST.md index 15efe7cd01..0626a5d47e 100644 --- a/fixtures/TEST.md +++ b/fixtures/TEST.md @@ -13,7 +13,8 @@ Test GZIP compression. (See https://github.com/analysis-tools-dev/static-analysi [LDRA](https://ldra.com) Some more complex formatting to test that Markdown parsing works. -[![CC0](https://i.creativecommons.org/p/zero/1.0/88x31.png)](https://creativecommons.org/publicdomain/zero/1.0/) +[![CC0](https://licensebuttons.net/p/zero/1.0/88x31.png)](https://creativecommons.org/publicdomain/zero/1.0/) + Test HTTP and HTTPS for the same site. http://example.com diff --git a/fixtures/TEST_GITHUB.md b/fixtures/TEST_GITHUB.md index 85a0492b61..2ca7e2112c 100644 --- a/fixtures/TEST_GITHUB.md +++ b/fixtures/TEST_GITHUB.md @@ -1,3 +1,3 @@ Test file: contains a single GitHub URL. -Lychee: https://github.com/hello-rust/lychee +Lychee: https://github.com/lycheeverse/lychee diff --git a/lychee-bin/Cargo.toml b/lychee-bin/Cargo.toml index 9e7c31987d..5a4cb3236e 100644 --- a/lychee-bin/Cargo.toml +++ b/lychee-bin/Cargo.toml @@ -66,6 +66,7 @@ cookie_store = "0.22.0" predicates = "3.1.3" pretty_assertions = "1.4.1" tempfile = "3.20.0" +test-utils = { path = "../test-utils" } tracing-subscriber = { version = "0.3.20", default-features = false, features = [ "fmt", "registry", diff --git a/lychee-bin/src/formatters/response/color.rs b/lychee-bin/src/formatters/response/color.rs index c1b70eed61..fd99c24ba9 100644 --- a/lychee-bin/src/formatters/response/color.rs +++ b/lychee-bin/src/formatters/response/color.rs @@ -1,6 +1,6 @@ use lychee_lib::{CacheStatus, ResponseBody, Status}; -use crate::formatters::color::{DIM, GREEN, NORMAL, PINK, YELLOW}; +use crate::formatters::color::{DIM, GREEN, PINK, YELLOW}; use super::{MAX_RESPONSE_OUTPUT_WIDTH, ResponseFormatter}; @@ -15,11 +15,10 @@ impl ColorFormatter { /// response. fn status_color(status: &Status) -> &'static std::sync::LazyLock { match status { - Status::Ok(_) | Status::Cached(CacheStatus::Ok(_)) => &GREEN, + Status::Ok(_) | Status::Cached(CacheStatus::Ok(_)) | Status::Redirected(_, _) => &GREEN, Status::Excluded | Status::Unsupported(_) | Status::Cached(CacheStatus::Excluded | CacheStatus::Unsupported) => &DIM, - Status::Redirected(_) => &NORMAL, Status::UnknownStatusCode(_) | Status::Timeout(_) => &YELLOW, Status::Error(_) | Status::Cached(CacheStatus::Error(_)) => &PINK, } diff --git a/lychee-bin/src/formatters/response/emoji.rs b/lychee-bin/src/formatters/response/emoji.rs index b4e403f062..06179920fd 100644 --- a/lychee-bin/src/formatters/response/emoji.rs +++ b/lychee-bin/src/formatters/response/emoji.rs @@ -17,7 +17,7 @@ impl EmojiFormatter { Status::Excluded | Status::Unsupported(_) | Status::Cached(CacheStatus::Excluded | CacheStatus::Unsupported) => "🚫", - Status::Redirected(_) => "ā†Ŗļø", + Status::Redirected(_, _) => "ā†Ŗļø", Status::UnknownStatusCode(_) | Status::Timeout(_) => "āš ļø", Status::Error(_) | Status::Cached(CacheStatus::Error(_)) => "āŒ", } @@ -40,7 +40,7 @@ impl ResponseFormatter for EmojiFormatter { mod emoji_tests { use super::*; use http::StatusCode; - use lychee_lib::{ErrorKind, Status, Uri}; + use lychee_lib::{ErrorKind, Redirects, Status, Uri}; // Helper function to create a ResponseBody with a given status and URI fn mock_response_body(status: Status, uri: &str) -> ResponseBody { @@ -84,7 +84,7 @@ mod emoji_tests { fn test_format_response_with_redirect_status() { let formatter = EmojiFormatter; let body = mock_response_body( - Status::Redirected(StatusCode::MOVED_PERMANENTLY), + Status::Redirected(StatusCode::MOVED_PERMANENTLY, Redirects::none()), "https://example.com/redirect", ); assert_eq!( diff --git a/lychee-bin/src/formatters/response/plain.rs b/lychee-bin/src/formatters/response/plain.rs index e46f4e6762..e3c76b10a3 100644 --- a/lychee-bin/src/formatters/response/plain.rs +++ b/lychee-bin/src/formatters/response/plain.rs @@ -22,6 +22,7 @@ impl ResponseFormatter for PlainFormatter { mod plain_tests { use super::*; use http::StatusCode; + use lychee_lib::Redirects; use lychee_lib::{ErrorKind, Status, Uri}; // Helper function to create a ResponseBody with a given status and URI @@ -69,12 +70,12 @@ mod plain_tests { fn test_format_response_with_redirect_status() { let formatter = PlainFormatter; let body = mock_response_body( - Status::Redirected(StatusCode::MOVED_PERMANENTLY), + Status::Redirected(StatusCode::MOVED_PERMANENTLY, Redirects::none()), "https://example.com/redirect", ); assert_eq!( formatter.format_response(&body), - "[301] https://example.com/redirect | Redirect (301 Moved Permanently): Moved Permanently" + "[301] https://example.com/redirect | Redirect: Followed 0 redirects resolving to the final status of: Moved Permanently" ); } diff --git a/lychee-bin/src/formatters/response/task.rs b/lychee-bin/src/formatters/response/task.rs index 50380852c8..466126321e 100644 --- a/lychee-bin/src/formatters/response/task.rs +++ b/lychee-bin/src/formatters/response/task.rs @@ -13,7 +13,7 @@ impl ResponseFormatter for TaskFormatter { mod task_tests { use super::*; use http::StatusCode; - use lychee_lib::{ErrorKind, Status, Uri}; + use lychee_lib::{ErrorKind, Redirects, Status, Uri}; // Helper function to create a ResponseBody with a given status and URI fn mock_response_body(status: Status, uri: &str) -> ResponseBody { @@ -60,12 +60,12 @@ mod task_tests { fn test_format_response_with_redirect_status() { let formatter = TaskFormatter; let body = mock_response_body( - Status::Redirected(StatusCode::MOVED_PERMANENTLY), + Status::Redirected(StatusCode::MOVED_PERMANENTLY, Redirects::none()), "https://example.com/redirect", ); assert_eq!( formatter.format_response(&body), - "- [ ] [301] https://example.com/redirect | Redirect (301 Moved Permanently): Moved Permanently" + "- [ ] [301] https://example.com/redirect | Redirect: Followed 0 redirects resolving to the final status of: Moved Permanently" ); } diff --git a/lychee-bin/src/formatters/stats/compact.rs b/lychee-bin/src/formatters/stats/compact.rs index 2dcfcf6e48..95ca254560 100644 --- a/lychee-bin/src/formatters/stats/compact.rs +++ b/lychee-bin/src/formatters/stats/compact.rs @@ -55,7 +55,7 @@ impl Display for CompactResponseStats { numeric_sort::cmp(&a, &b) }); - writeln!(f, "\n\u{2139} Suggestions")?; + writeln!(f, "\nℹ Suggestions")?; for suggestion in sorted_suggestions { writeln!(f, "{suggestion}")?; } @@ -81,6 +81,7 @@ impl Display for CompactResponseStats { write_if_any(stats.excludes, "šŸ‘»", "Excluded", &BOLD_YELLOW, f)?; write_if_any(stats.timeouts, "ā³", "Timeouts", &BOLD_YELLOW, f)?; write_if_any(stats.unsupported, "ā›”", "Unsupported", &BOLD_YELLOW, f)?; + write_if_any(stats.redirects, "šŸ”€", "Redirects", &BOLD_YELLOW, f)?; Ok(()) } @@ -167,6 +168,7 @@ mod tests { duration_secs: 0, error_map, suggestion_map: HashMap::default(), + redirect_map: HashMap::default(), unsupported: 0, redirects: 0, cached: 0, diff --git a/lychee-bin/src/formatters/stats/detailed.rs b/lychee-bin/src/formatters/stats/detailed.rs index 6ae44e0951..f283c3111e 100644 --- a/lychee-bin/src/formatters/stats/detailed.rs +++ b/lychee-bin/src/formatters/stats/detailed.rs @@ -36,16 +36,16 @@ impl Display for DetailedResponseStats { let stats = &self.stats; let separator = "-".repeat(MAX_PADDING + 1); - writeln!(f, "\u{1f4dd} Summary")?; // šŸ“ + writeln!(f, "šŸ“ Summary")?; writeln!(f, "{separator}")?; - write_stat(f, "\u{1f50d} Total", stats.total, true)?; // šŸ” - write_stat(f, "\u{2705} Successful", stats.successful, true)?; // āœ… - write_stat(f, "\u{23f3} Timeouts", stats.timeouts, true)?; // ā³ - write_stat(f, "\u{1f500} Redirected", stats.redirects, true)?; // šŸ”€ - write_stat(f, "\u{1f47b} Excluded", stats.excludes, true)?; // šŸ‘» - write_stat(f, "\u{2753} Unknown", stats.unknown, true)?; //ā“ - write_stat(f, "\u{1f6ab} Errors", stats.errors, true)?; // 🚫 - write_stat(f, "\u{26d4} Unsupported", stats.errors, false)?; // ā›” + write_stat(f, "šŸ” Total", stats.total, true)?; + write_stat(f, "āœ… Successful", stats.successful, true)?; + write_stat(f, "ā³ Timeouts", stats.timeouts, true)?; + write_stat(f, "šŸ”€ Redirected", stats.redirects, true)?; + write_stat(f, "šŸ‘» Excluded", stats.excludes, true)?; + write_stat(f, "ā“ Unknown", stats.unknown, true)?; + write_stat(f, "🚫 Errors", stats.errors, true)?; + write_stat(f, "ā›” Unsupported", stats.errors, false)?; let response_formatter = get_response_formatter(&self.mode); @@ -138,6 +138,7 @@ mod tests { redirects: 0, cached: 0, suggestion_map: HashMap::default(), + redirect_map: HashMap::default(), success_map: HashMap::default(), error_map, excluded_map: HashMap::default(), diff --git a/lychee-bin/src/formatters/stats/markdown.rs b/lychee-bin/src/formatters/stats/markdown.rs index 908c9682da..c45ddd0aee 100644 --- a/lychee-bin/src/formatters/stats/markdown.rs +++ b/lychee-bin/src/formatters/stats/markdown.rs @@ -26,35 +26,35 @@ struct StatsTableEntry { fn stats_table(stats: &ResponseStats) -> String { let stats = vec![ StatsTableEntry { - status: "\u{1f50d} Total", + status: "šŸ” Total", count: stats.total, }, StatsTableEntry { - status: "\u{2705} Successful", + status: "āœ… Successful", count: stats.successful, }, StatsTableEntry { - status: "\u{23f3} Timeouts", + status: "ā³ Timeouts", count: stats.timeouts, }, StatsTableEntry { - status: "\u{1f500} Redirected", + status: "šŸ”€ Redirected", count: stats.redirects, }, StatsTableEntry { - status: "\u{1f47b} Excluded", + status: "šŸ‘» Excluded", count: stats.excludes, }, StatsTableEntry { - status: "\u{2753} Unknown", + status: "ā“ Unknown", count: stats.unknown, }, StatsTableEntry { - status: "\u{1f6ab} Errors", + status: "🚫 Errors", count: stats.errors, }, StatsTableEntry { - status: "\u{26d4} Unsupported", + status: "ā›” Unsupported", count: stats.unsupported, }, ]; diff --git a/lychee-bin/src/formatters/suggestion.rs b/lychee-bin/src/formatters/suggestion.rs index a3c2d2966e..019b2d65c5 100644 --- a/lychee-bin/src/formatters/suggestion.rs +++ b/lychee-bin/src/formatters/suggestion.rs @@ -4,8 +4,8 @@ use crate::color::{GREEN, PINK, color}; use serde::Serialize; use url::Url; -#[derive(Debug, Serialize, Eq, Hash, PartialEq)] /// A suggestion on how to replace a broken link with a link hosted by a web archive service. +#[derive(Debug, Serialize, Eq, Hash, PartialEq)] pub(crate) struct Suggestion { /// The original `Url` that was identified to be broken pub(crate) original: Url, diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index c814b0aa7c..db10691ca2 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -210,10 +210,10 @@ fn load_cookie_jar(cfg: &Config) -> Result> { } } -#[must_use] /// Load cache (if exists and is still valid) /// This returns an `Option` as starting without a cache is a common scenario /// and we silently discard errors on purpose +#[must_use] fn load_cache(cfg: &Config) -> Option { if !cfg.cache { return None; diff --git a/lychee-bin/src/stats.rs b/lychee-bin/src/stats.rs index 29ba730a30..03459efa7a 100644 --- a/lychee-bin/src/stats.rs +++ b/lychee-bin/src/stats.rs @@ -37,13 +37,15 @@ pub(crate) struct ResponseStats { pub(crate) errors: usize, /// Number of responses that were cached from a previous run pub(crate) cached: usize, - /// Map to store successful responses (if `detailed_stats` is enabled) + /// Store successful responses (if `detailed_stats` is enabled) pub(crate) success_map: HashMap>, - /// Map to store failed responses (if `detailed_stats` is enabled) + /// Store failed responses (if `detailed_stats` is enabled) pub(crate) error_map: HashMap>, /// Replacement suggestions for failed responses (if `--suggest` is enabled) pub(crate) suggestion_map: HashMap>, - /// Map to store excluded responses (if `detailed_stats` is enabled) + /// Store redirected responses (if `detailed_stats` is enabled) + pub(crate) redirect_map: HashMap>, + /// Store excluded responses (if `detailed_stats` is enabled) pub(crate) excluded_map: HashMap>, /// Used to store the duration of the run in seconds. pub(crate) duration_secs: u64, @@ -72,7 +74,7 @@ impl ResponseStats { Status::Error(_) => self.errors += 1, Status::UnknownStatusCode(_) => self.unknown += 1, Status::Timeout(_) => self.timeouts += 1, - Status::Redirected(_) => self.redirects += 1, + Status::Redirected(_, _) => self.redirects += 1, Status::Excluded => self.excludes += 1, Status::Unsupported(_) => self.unsupported += 1, Status::Cached(cache_status) => { @@ -95,6 +97,9 @@ impl ResponseStats { _ if status.is_error() => self.error_map.entry(source).or_default(), Status::Ok(_) if self.detailed_stats => self.success_map.entry(source).or_default(), Status::Excluded if self.detailed_stats => self.excluded_map.entry(source).or_default(), + Status::Redirected(_, _) if self.detailed_stats => { + self.redirect_map.entry(source).or_default() + } _ => return, }; status_map_entry.insert(response.1); @@ -110,7 +115,7 @@ impl ResponseStats { #[inline] /// Check if the entire run was successful pub(crate) const fn is_success(&self) -> bool { - self.total == self.successful + self.excludes + self.unsupported + self.total == self.successful + self.excludes + self.unsupported + self.redirects } #[inline] diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 9ad2db640c..ef75516cca 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1,16 +1,7 @@ #[cfg(test)] mod cli { - use std::{ - collections::{HashMap, HashSet}, - error::Error, - fs::{self, File}, - io::{BufRead, Write}, - path::{Path, PathBuf}, - time::Duration, - }; - use anyhow::anyhow; - use assert_cmd::{Command, assert::Assert}; + use assert_cmd::{Command, assert::Assert, output::OutputOkExt}; use assert_json_diff::assert_json_include; use http::{Method, StatusCode}; use lychee_lib::{InputSource, ResponseBody}; @@ -22,7 +13,17 @@ mod cli { use regex::Regex; use serde::Serialize; use serde_json::Value; + use std::{ + collections::{HashMap, HashSet}, + error::Error, + fs::{self, File}, + io::{BufRead, Write}, + path::{Path, PathBuf}, + time::Duration, + }; use tempfile::NamedTempFile; + use test_utils::{mock_server, redirecting_mock_server}; + use uuid::Uuid; use wiremock::{ Mock, ResponseTemplate, @@ -36,17 +37,6 @@ mod cli { // constant. const LYCHEE_CACHE_FILE: &str = ".lycheecache"; - /// Create a mock server which returns a custom status code. - macro_rules! mock_server { - ($status:expr $(, $func:tt ($($arg:expr),*))*) => {{ - let mock_server = wiremock::MockServer::start().await; - let response_template = wiremock::ResponseTemplate::new(http::StatusCode::from($status)); - let template = response_template$(.$func($($arg),*))*; - wiremock::Mock::given(wiremock::matchers::method("GET")).respond_with(template).mount(&mock_server).await; - mock_server - }}; - } - /// Create a mock server which returns a 200 OK and a custom response body. macro_rules! mock_response { ($body:expr) => {{ @@ -1456,7 +1446,11 @@ mod cli { cmd.arg(&test_path).assert().success(); let mut cmd = main_command(); - cmd.arg("--require-https").arg(test_path).assert().failure(); + cmd.arg("--require-https") + .arg(test_path) + .assert() + .failure() + .stdout(contains("This URI is available in HTTPS protocol, but HTTP is provided. Use 'https://example.com/' instead")); Ok(()) } @@ -2234,16 +2228,19 @@ mod cli { let mock_server = mock_server!(StatusCode::OK); let config = fixtures_path().join("configs").join("format.toml"); let mut cmd = main_command(); - cmd.arg("--config") + let output = cmd + .arg("--config") .arg(config) .arg("-") .write_stdin(mock_server.uri()) .env_clear() .assert() - .success(); + .success() + .get_output() + .clone() + .unwrap(); // Check that the output is in JSON format - let output = cmd.output().unwrap(); let output = std::str::from_utf8(&output.stdout).unwrap(); let json: serde_json::Value = serde_json::from_str(output)?; assert_eq!(json["total"], 1); @@ -2251,6 +2248,45 @@ mod cli { Ok(()) } + #[tokio::test] + async fn test_redirect_json() { + use serde_json::json; + redirecting_mock_server!(async |redirect_url: Url, ok_url| { + let mut cmd = main_command(); + let output = cmd + .arg("-") + .arg("--format") + .arg("json") + .arg("--verbose") // required to make redirect_map visible + .write_stdin(redirect_url.as_str()) + .env_clear() + .assert() + .success() + .get_output() + .clone() + .unwrap(); + + // Check that the output is in JSON format + let output = std::str::from_utf8(&output.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(output).unwrap(); + assert_eq!(json["total"], 1); + assert_eq!(json["redirects"], 1); + assert_eq!( + json["redirect_map"], + json!({ + "stdin":[{ + "status": { + "code": 200, + "text": "Redirect", + "redirects": [ redirect_url, ok_url ] + }, + "url": redirect_url + }]}) + ); + }) + .await; + } + #[tokio::test] async fn test_retry() -> Result<()> { let mock_server = wiremock::MockServer::start().await; diff --git a/lychee-lib/Cargo.toml b/lychee-lib/Cargo.toml index 1b98be11e3..6e37cbd095 100644 --- a/lychee-lib/Cargo.toml +++ b/lychee-lib/Cargo.toml @@ -68,12 +68,13 @@ features = ["runtime-tokio"] [dev-dependencies] doc-comment = "0.3.3" -tempfile = "3.20.0" -wiremock = "0.6.5" -serde_json = "1.0.142" +pretty_assertions = "1.4.1" rstest = "0.26.1" +serde_json = "1.0.142" +tempfile = "3.20.0" +test-utils = { path = "../test-utils" } toml = "0.9.5" -pretty_assertions = "1.4.1" +wiremock = "0.6.5" [features] diff --git a/lychee-lib/src/archive/mod.rs b/lychee-lib/src/archive/mod.rs index 6d02da554f..4df36edd06 100644 --- a/lychee-lib/src/archive/mod.rs +++ b/lychee-lib/src/archive/mod.rs @@ -5,11 +5,11 @@ use strum::{Display, EnumIter, EnumString, VariantNames}; mod wayback; +/// The different supported online archive sites for restoring broken links. #[non_exhaustive] #[derive( Debug, Deserialize, Default, Clone, Display, EnumIter, EnumString, VariantNames, PartialEq, Eq, )] -/// The different supported online archive sites for restoring broken links. pub enum Archive { #[serde(rename = "wayback")] #[strum(serialize = "wayback", ascii_case_insensitive)] diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index b313c98340..7a1deb97b4 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -308,13 +308,12 @@ impl FileChecker { #[cfg(test)] mod tests { - use super::FileChecker; - use crate::test_utils::{fixture_uri, fixtures_path}; use crate::{ ErrorKind::{InvalidFilePath, InvalidFragment, InvalidIndexFile}, - Status, + Status, Uri, }; + use test_utils::{fixture_uri, fixtures_path}; /// Calls [`FileChecker::check`] on the given [`FileChecker`] with given URL /// path (relative to the fixtures directory). @@ -322,7 +321,7 @@ mod tests { /// The result of checking the link is matched against the given pattern. macro_rules! assert_filecheck { ($checker:expr, $path:expr, $pattern:pat) => { - let uri = fixture_uri($path); + let uri = Uri::from(fixture_uri!($path)); let result = $checker.check(&uri).await; assert!( matches!(result, $pattern), @@ -341,7 +340,7 @@ mod tests { /// The pattern should match values of type `Result<&str, ErrorKind>`. macro_rules! assert_resolves { ($checker:expr, $subpath:expr, $expected:pat) => { - let uri = fixture_uri($subpath); + let uri = Uri::from(fixture_uri!($subpath)); let path = uri .url .to_file_path() @@ -349,7 +348,7 @@ mod tests { let result = $checker.resolve_local_path(&path, &uri); let result_subpath = result .as_deref() - .map(|p| p.strip_prefix(fixtures_path()).unwrap()) + .map(|p| p.strip_prefix(fixtures_path!()).unwrap()) .map(|p| p.to_string_lossy()); assert!( matches!(result_subpath.as_deref(), $expected), @@ -537,7 +536,7 @@ mod tests { ); // absolute paths to a file on disk should also work - let absolute_html = fixtures_path() + let absolute_html = fixtures_path!() .join("filechecker/index_dir/index.html") .to_str() .expect("expected utf-8 fixtures path") diff --git a/lychee-lib/src/checker/website.rs b/lychee-lib/src/checker/website.rs index eb740f98de..2bccc86f05 100644 --- a/lychee-lib/src/checker/website.rs +++ b/lychee-lib/src/checker/website.rs @@ -3,7 +3,7 @@ use crate::{ chain::{Chain, ChainResult, ClientRequestChains, Handler, RequestChain}, quirks::Quirks, retry::RetryExt, - types::uri::github::GithubUri, + types::{redirect_history::RedirectHistory, uri::github::GithubUri}, utils::fragment_checker::{FragmentChecker, FragmentInput}, }; use async_trait::async_trait; @@ -51,6 +51,9 @@ pub(crate) struct WebsiteChecker { /// Utility for performing fragment checks in HTML files. fragment_checker: FragmentChecker, + + /// Keep track of HTTP redirections for reporting + redirect_history: RedirectHistory, } impl WebsiteChecker { @@ -58,6 +61,7 @@ impl WebsiteChecker { pub(crate) fn new( method: reqwest::Method, retry_wait_time: Duration, + redirect_history: RedirectHistory, max_retries: u64, reqwest_client: reqwest::Client, accepted: HashSet, @@ -71,6 +75,7 @@ impl WebsiteChecker { reqwest_client, github_client, plugin_request_chain, + redirect_history, max_retries, retry_wait_time, accepted, @@ -95,6 +100,7 @@ impl WebsiteChecker { wait_time = wait_time.saturating_mul(2); status = self.check_default(clone_unwrap(&request)).await; } + status } @@ -187,20 +193,36 @@ impl WebsiteChecker { Box::new(self.clone()), ]); - match self.check_website_inner(uri, &default_chain).await { - Status::Ok(code) if self.require_https && uri.scheme() == "http" => { - if self - .check_website_inner(&uri.to_https()?, &default_chain) + let status = self.check_website_inner(uri, &default_chain).await; + let status = self + .handle_insecure_url(uri, &default_chain, status) + .await?; + Ok(self.redirect_history.handle_redirected(&uri.url, status)) + } + + /// Mark HTTP URLs as insecure, if the user required HTTPS + /// and the URL is available under HTTPS. + async fn handle_insecure_url( + &self, + uri: &Uri, + default_chain: &Chain, + status: Status, + ) -> Result { + if self.require_https && uri.scheme() == "http" { + if let Status::Ok(_) = status { + let https_uri = uri.to_https()?; + let is_https_available = self + .check_website_inner(&https_uri, default_chain) .await - .is_success() - { - Ok(Status::Error(ErrorKind::InsecureURL(uri.to_https()?))) - } else { - Ok(Status::Ok(code)) + .is_success(); + + if is_https_available { + return Ok(Status::Error(ErrorKind::InsecureURL(https_uri))); } } - s => Ok(s), } + + Ok(status) } /// Checks the given URI of a website. diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 9213a30be8..63306bb91c 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -33,7 +33,7 @@ use crate::{ checker::{file::FileChecker, mail::MailChecker, website::WebsiteChecker}, filter::Filter, remap::Remaps, - types::DEFAULT_ACCEPTED_STATUS_CODES, + types::{DEFAULT_ACCEPTED_STATUS_CODES, redirect_history::RedirectHistory}, }; /// Default number of redirects before a request is deemed as failed, 5. @@ -349,16 +349,7 @@ impl ClientBuilder { HeaderValue::from_static("chunked"), ); - // Custom redirect policy to enable logging of redirects. - let max_redirects = self.max_redirects; - let redirect_policy = redirect::Policy::custom(move |attempt| { - if attempt.previous().len() > max_redirects { - attempt.error("too many redirects") - } else { - debug!("Redirecting to {}", attempt.url()); - attempt.follow() - } - }); + let redirect_history = RedirectHistory::new(); let mut builder = reqwest::ClientBuilder::new() .gzip(true) @@ -366,7 +357,10 @@ impl ClientBuilder { .danger_accept_invalid_certs(self.allow_insecure) .connect_timeout(Duration::from_secs(CONNECT_TIMEOUT)) .tcp_keepalive(Duration::from_secs(TCP_KEEPALIVE)) - .redirect(redirect_policy); + .redirect(redirect_policy( + redirect_history.clone(), + self.max_redirects, + )); if let Some(cookie_jar) = self.cookie_jar { builder = builder.cookie_provider(cookie_jar); @@ -410,6 +404,7 @@ impl ClientBuilder { let website_checker = WebsiteChecker::new( self.method, self.retry_wait_time, + redirect_history.clone(), self.max_retries, reqwest_client, self.accepted, @@ -434,6 +429,21 @@ impl ClientBuilder { } } +/// Create our custom [`redirect::Policy`] in order to stop following redirects +/// once `max_redirects` is reached and to record redirections for reporting. +fn redirect_policy(redirect_history: RedirectHistory, max_redirects: usize) -> redirect::Policy { + redirect::Policy::custom(move |attempt| { + if attempt.previous().len() > max_redirects { + attempt.stop() + } else { + let redirects = &[attempt.previous(), &[attempt.url().clone()]].concat(); + redirect_history.record_redirects(redirects); + debug!("Following redirect to {}", attempt.url()); + attempt.follow() + } + }) +} + /// Handles incoming requests and returns responses. /// /// See [`ClientBuilder`] which contains sane defaults for all configuration @@ -443,7 +453,7 @@ pub struct Client { /// Optional remapping rules for URIs matching pattern. remaps: Option, - /// Rules to decided whether each link should be checked or ignored. + /// Rules to decide whether a given link should be checked or ignored. filter: Filter, /// A checker for website URLs. @@ -481,15 +491,6 @@ impl Client { .. } = request.try_into()?; - // Allow filtering based on element and attribute - // if !self.filter.is_allowed(uri) { - // return Ok(Response::new( - // uri.clone(), - // Status::Excluded, - // source, - // )); - // } - self.remap(uri)?; if self.is_excluded(uri) { @@ -585,45 +586,49 @@ mod tests { use http::{StatusCode, header::HeaderMap}; use reqwest::header; use tempfile::tempdir; - use wiremock::matchers::path; + use test_utils::get_mock_client_response; + use test_utils::mock_server; + use test_utils::redirecting_mock_server; + use wiremock::{ + Mock, + matchers::{method, path}, + }; use super::ClientBuilder; use crate::{ ErrorKind, Request, Status, Uri, chain::{ChainResult, Handler, RequestChain}, - mock_server, - test_utils::get_mock_client_response, }; #[tokio::test] async fn test_nonexistent() { let mock_server = mock_server!(StatusCode::NOT_FOUND); - let res = get_mock_client_response(mock_server.uri()).await; + let res = get_mock_client_response!(mock_server.uri()).await; assert!(res.status().is_error()); } #[tokio::test] async fn test_nonexistent_with_path() { - let res = get_mock_client_response("http://127.0.0.1/invalid").await; + let res = get_mock_client_response!("http://127.0.0.1/invalid").await; assert!(res.status().is_error()); } #[tokio::test] async fn test_github() { - let res = get_mock_client_response("https://github.com/lycheeverse/lychee").await; + let res = get_mock_client_response!("https://github.com/lycheeverse/lychee").await; assert!(res.status().is_success()); } #[tokio::test] async fn test_github_nonexistent_repo() { - let res = get_mock_client_response("https://github.com/lycheeverse/not-lychee").await; + let res = get_mock_client_response!("https://github.com/lycheeverse/not-lychee").await; assert!(res.status().is_error()); } #[tokio::test] async fn test_github_nonexistent_file() { - let res = get_mock_client_response( + let res = get_mock_client_response!( "https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md", ) .await; @@ -633,10 +638,10 @@ mod tests { #[tokio::test] async fn test_youtube() { // This is applying a quirk. See the quirks module. - let res = get_mock_client_response("https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await; + let res = get_mock_client_response!("https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await; assert!(res.status().is_success()); - let res = get_mock_client_response("https://www.youtube.com/watch?v=invalidNlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await; + let res = get_mock_client_response!("https://www.youtube.com/watch?v=invalidNlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await; assert!(res.status().is_error()); } @@ -646,7 +651,7 @@ mod tests { .try_into() .unwrap(); - let res = get_mock_client_response(r.clone()).await; + let res = get_mock_client_response!(r.clone()).await; assert_eq!(res.status().code(), Some(401.try_into().unwrap())); r.credentials = Some(crate::BasicAuthCredentials { @@ -654,21 +659,24 @@ mod tests { password: "pass".into(), }); - let res = get_mock_client_response(r).await; - assert!(res.status().is_success()); + let res = get_mock_client_response!(r).await; + assert!(matches!( + res.status(), + Status::Redirected(StatusCode::OK, _) + )); } #[tokio::test] async fn test_non_github() { let mock_server = mock_server!(StatusCode::OK); - let res = get_mock_client_response(mock_server.uri()).await; + let res = get_mock_client_response!(mock_server.uri()).await; assert!(res.status().is_success()); } #[tokio::test] async fn test_invalid_ssl() { - let res = get_mock_client_response("https://expired.badssl.com/").await; + let res = get_mock_client_response!("https://expired.badssl.com/").await; assert!(res.status().is_error()); @@ -691,7 +699,7 @@ mod tests { File::create(file).unwrap(); let uri = format!("file://{}", dir.path().join("temp").to_str().unwrap()); - let res = get_mock_client_response(uri).await; + let res = get_mock_client_response!(uri).await; assert!(res.status().is_success()); } @@ -855,64 +863,56 @@ mod tests { async fn test_max_redirects() { let mock_server = wiremock::MockServer::start().await; - let ok_uri = format!("{}/ok", &mock_server.uri()); let redirect_uri = format!("{}/redirect", &mock_server.uri()); - - // Set up permanent redirect loop let redirect = wiremock::ResponseTemplate::new(StatusCode::PERMANENT_REDIRECT) - .insert_header("Location", ok_uri.as_str()); - wiremock::Mock::given(wiremock::matchers::method("GET")) - .and(path("/redirect")) - .respond_with(redirect) - .mount(&mock_server) - .await; + .insert_header("Location", redirect_uri.as_str()); - let ok = wiremock::ResponseTemplate::new(StatusCode::OK); - wiremock::Mock::given(wiremock::matchers::method("GET")) - .and(path("/ok")) - .respond_with(ok) + let redirect_count = 15usize; + let initial_invocation = 1; + + // Set up infinite redirect loop + Mock::given(method("GET")) + .and(path("/redirect")) + .respond_with(move |_: &_| redirect.clone()) + .expect(initial_invocation + redirect_count as u64) .mount(&mock_server) .await; - let client = ClientBuilder::builder() - .max_redirects(0_usize) - .build() - .client() - .unwrap(); - - let res = client.check(redirect_uri.clone()).await.unwrap(); - assert!(res.status().is_error()); - - let client = ClientBuilder::builder() - .max_redirects(1_usize) + let res = ClientBuilder::builder() + .max_redirects(redirect_count) .build() .client() + .unwrap() + .check(redirect_uri.clone()) + .await .unwrap(); - let res = client.check(redirect_uri).await.unwrap(); - assert!(res.status().is_success()); + assert_eq!( + res.status(), + &Status::Error(ErrorKind::RejectedStatusCode( + StatusCode::PERMANENT_REDIRECT + )) + ); } #[tokio::test] - async fn test_limit_max_redirects() { - let mock_server = wiremock::MockServer::start().await; - - // Set up permanent redirect loop - let template = wiremock::ResponseTemplate::new(StatusCode::PERMANENT_REDIRECT) - .insert_header("Location", mock_server.uri().as_str()); - wiremock::Mock::given(wiremock::matchers::method("GET")) - .respond_with(template) - .mount(&mock_server) - .await; - - let client = ClientBuilder::builder() - .max_redirects(0_usize) - .build() - .client() - .unwrap(); - - let res = client.check(mock_server.uri()).await.unwrap(); - assert!(res.status().is_error()); + async fn test_redirects() { + redirecting_mock_server!(async |redirect_url: Url, ok_ur| { + let res = ClientBuilder::builder() + .max_redirects(1_usize) + .build() + .client() + .unwrap() + .check(Uri::from((redirect_url).clone())) + .await + .unwrap(); + + assert_eq!( + res.status(), + &Status::Redirected(StatusCode::OK, vec![redirect_url, ok_ur].into()) + ); + }) + .await; } #[tokio::test] diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 5ce8b99bab..e209c56a5e 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -307,6 +307,7 @@ impl Collector { mod tests { use std::borrow::Cow; use std::{collections::HashSet, convert::TryFrom, fs::File, io::Write}; + use test_utils::{fixtures_path, load_fixture, mail, mock_server, path, website}; use http::StatusCode; use reqwest::Url; @@ -315,8 +316,6 @@ mod tests { use crate::{ Result, Uri, filter::PathExcludes, - mock_server, - test_utils::{load_fixture, mail, path, website}, types::{FileType, Input, InputSource}, }; @@ -469,11 +468,11 @@ mod tests { .unwrap(); let expected_links = HashSet::from_iter([ - website(TEST_STRING), - website(TEST_URL), - website(TEST_FILE), - website(TEST_GLOB_1), - mail(TEST_GLOB_2_MAIL), + website!(TEST_STRING), + website!(TEST_URL), + website!(TEST_FILE), + website!(TEST_GLOB_1), + mail!(TEST_GLOB_2_MAIL), ]); assert_eq!(links, expected_links); @@ -495,8 +494,8 @@ mod tests { let links = collect(inputs, None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ - website("https://endler.dev"), - website("https://github.com/hello-rust/lychee/relative_link"), + website!("https://endler.dev"), + website!("https://github.com/hello-rust/lychee/relative_link"), ]); assert_eq!(links, expected_links); @@ -521,8 +520,8 @@ mod tests { let links = collect(inputs, None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ - website("https://github.com/lycheeverse/lychee/"), - website("https://github.com/lycheeverse/blob/master/README.md"), + website!("https://github.com/lycheeverse/lychee/"), + website!("https://github.com/lycheeverse/blob/master/README.md"), ]); assert_eq!(links, expected_links); @@ -550,9 +549,9 @@ mod tests { let links = collect(inputs, None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ - website("https://example.com/static/image.png"), - website("https://example.com/static/image300.png"), - website("https://example.com/static/image600.png"), + website!("https://example.com/static/image.png"), + website!("https://example.com/static/image300.png"), + website!("https://example.com/static/image600.png"), ]); assert_eq!(links, expected_links); @@ -576,10 +575,10 @@ mod tests { let links = collect(inputs, None, Some(base)).await.ok().unwrap(); let expected = HashSet::from_iter([ - website("https://localhost.com/@/internal.md"), - website("https://localhost.com/@/internal.markdown"), - website("https://localhost.com/@/internal.md#example"), - website("https://localhost.com/@/internal.markdown#example"), + website!("https://localhost.com/@/internal.md"), + website!("https://localhost.com/@/internal.markdown"), + website!("https://localhost.com/@/internal.md#example"), + website!("https://localhost.com/@/internal.markdown#example"), ]); assert_eq!(links, expected); @@ -588,7 +587,7 @@ mod tests { #[tokio::test] async fn test_extract_html5_not_valid_xml_relative_links() { let base = Base::try_from("https://example.com").unwrap(); - let input = load_fixture("TEST_HTML5.html"); + let input = load_fixture!("TEST_HTML5.html"); let input = Input { source: InputSource::String(Cow::Owned(input)), @@ -600,12 +599,12 @@ mod tests { let expected_links = HashSet::from_iter([ // the body links wouldn't be present if the file was parsed strictly as XML - website("https://example.com/body/a"), - website("https://example.com/body/div_empty_a"), - website("https://example.com/css/style_full_url.css"), - website("https://example.com/css/style_relative_url.css"), - website("https://example.com/head/home"), - website("https://example.com/images/icon.png"), + website!("https://example.com/body/a"), + website!("https://example.com/body/div_empty_a"), + website!("https://example.com/css/style_full_url.css"), + website!("https://example.com/css/style_relative_url.css"), + website!("https://example.com/head/home"), + website!("https://example.com/images/icon.png"), ]); assert_eq!(links, expected_links); @@ -630,8 +629,8 @@ mod tests { let links = collect(inputs, None, None).await.ok().unwrap(); let expected_urls = HashSet::from_iter([ - website("https://github.com/lycheeverse/lychee/"), - website(&format!("{server_uri}about")), + website!("https://github.com/lycheeverse/lychee/"), + website!(&format!("{server_uri}about")), ]); assert_eq!(links, expected_urls); @@ -647,7 +646,7 @@ mod tests { let links = collect(inputs, None, None).await.ok().unwrap(); - let expected_links = HashSet::from_iter([mail("user@example.com")]); + let expected_links = HashSet::from_iter([mail!("user@example.com")]); assert_eq!(links, expected_links); } @@ -689,11 +688,11 @@ mod tests { let links = collect(inputs, None, None).await.ok().unwrap(); let expected_links = HashSet::from_iter([ - website(&format!( + website!(&format!( "{}/foo/relative.html", mock_server_1.uri().trim_end_matches('/') )), - website(&format!( + website!(&format!( "{}/bar/relative.html", mock_server_2.uri().trim_end_matches('/') )), @@ -723,9 +722,9 @@ mod tests { let links = collect(inputs, None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ - path("/path/to/root/index.html"), - path("/path/to/root/about.html"), - path("/another.html"), + path!("/path/to/root/index.html"), + path!("/path/to/root/about.html"), + path!("/another.html"), ]); assert_eq!(links, expected_links); diff --git a/lychee-lib/src/extract/mod.rs b/lychee-lib/src/extract/mod.rs index 08dd9a220f..17feb82bf9 100644 --- a/lychee-lib/src/extract/mod.rs +++ b/lychee-lib/src/extract/mod.rs @@ -66,11 +66,11 @@ mod tests { use pretty_assertions::assert_eq; use reqwest::Url; use std::{collections::HashSet, path::Path}; + use test_utils::{fixtures_path, load_fixture, mail, website}; use super::*; use crate::{ Uri, - test_utils::{load_fixture, mail, website}, types::{FileType, InputContent, ResolvedInputSource}, utils::url::find_links, }; @@ -148,7 +148,7 @@ mod tests { fn test_skip_markdown_email() { let input = "Get in touch - [Contact Us](mailto:test@test.com)"; let links = extract_uris(input, FileType::Markdown); - let expected = IntoIterator::into_iter([mail("test@test.com")]).collect::>(); + let expected = IntoIterator::into_iter([mail!("test@test.com")]).collect::>(); assert_eq!(links, expected); } @@ -167,9 +167,9 @@ mod tests { let links: HashSet = extract_uris(input, FileType::Plaintext); let expected = IntoIterator::into_iter([ - website("https://endler.dev"), - website("https://hello-rust.show/foo/bar?lol=1"), - mail("test@example.com"), + website!("https://endler.dev"), + website!("https://hello-rust.show/foo/bar?lol=1"), + mail!("test@example.com"), ]) .collect::>(); @@ -187,15 +187,15 @@ mod tests { #[test] fn test_extract_html5_not_valid_xml() { - let input = load_fixture("TEST_HTML5.html"); + let input = load_fixture!("TEST_HTML5.html"); let links = extract_uris(&input, FileType::Html); let expected_links = IntoIterator::into_iter([ - website("https://example.com/head/home"), - website("https://example.com/css/style_full_url.css"), + website!("https://example.com/head/home"), + website!("https://example.com/css/style_full_url.css"), // the body links wouldn't be present if the file was parsed strictly as XML - website("https://example.com/body/a"), - website("https://example.com/body/div_empty_a"), + website!("https://example.com/body/a"), + website!("https://example.com/body/div_empty_a"), ]) .collect::>(); @@ -243,10 +243,10 @@ mod tests { #[test] fn test_extract_html5_lowercase_doctype() { // this has been problematic with previous XML based parser - let input = load_fixture("TEST_HTML5_LOWERCASE_DOCTYPE.html"); + let input = load_fixture!("TEST_HTML5_LOWERCASE_DOCTYPE.html"); let links = extract_uris(&input, FileType::Html); - let expected_links = IntoIterator::into_iter([website("https://example.com/body/a")]) + let expected_links = IntoIterator::into_iter([website!("https://example.com/body/a")]) .collect::>(); assert_eq!(links, expected_links); @@ -255,16 +255,16 @@ mod tests { #[test] fn test_extract_html5_minified() { // minified HTML with some quirky elements such as href attribute values specified without quotes - let input = load_fixture("TEST_HTML5_MINIFIED.html"); + let input = load_fixture!("TEST_HTML5_MINIFIED.html"); let links = extract_uris(&input, FileType::Html); let expected_links = IntoIterator::into_iter([ - website("https://example.com/"), - website("https://example.com/favicon.ico"), + website!("https://example.com/"), + website!("https://example.com/favicon.ico"), // Note that we exclude `preconnect` links: - // website("https://fonts.externalsite.com"), - website("https://example.com/docs/"), - website("https://example.com/forum"), + // website!("https://fonts.externalsite.com"), + website!("https://example.com/docs/"), + website!("https://example.com/forum"), ]) .collect::>(); @@ -274,10 +274,10 @@ mod tests { #[test] fn test_extract_html5_malformed() { // malformed links shouldn't stop the parser from further parsing - let input = load_fixture("TEST_HTML5_MALFORMED_LINKS.html"); + let input = load_fixture!("TEST_HTML5_MALFORMED_LINKS.html"); let links = extract_uris(&input, FileType::Html); - let expected_links = IntoIterator::into_iter([website("https://example.com/valid")]) + let expected_links = IntoIterator::into_iter([website!("https://example.com/valid")]) .collect::>(); assert_eq!(links, expected_links); @@ -286,14 +286,14 @@ mod tests { #[test] fn test_extract_html5_custom_elements() { // the element name shouldn't matter for attributes like href, src, cite etc - let input = load_fixture("TEST_HTML5_CUSTOM_ELEMENTS.html"); + let input = load_fixture!("TEST_HTML5_CUSTOM_ELEMENTS.html"); let links = extract_uris(&input, FileType::Html); let expected_links = IntoIterator::into_iter([ - website("https://example.com/some-weird-element"), - website("https://example.com/even-weirder-src"), - website("https://example.com/even-weirder-href"), - website("https://example.com/citations"), + website!("https://example.com/some-weird-element"), + website!("https://example.com/even-weirder-src"), + website!("https://example.com/even-weirder-href"), + website!("https://example.com/citations"), ]) .collect::>(); @@ -307,8 +307,8 @@ mod tests { let links = extract_uris(&input, FileType::Plaintext); let expected_links = IntoIterator::into_iter([ - website("https://example.com/@test/test"), - website("http://otherdomain.com/test/@test"), + website!("https://example.com/@test/test"), + website!("http://otherdomain.com/test/@test"), ]) .collect::>(); @@ -321,7 +321,7 @@ mod tests { let links = extract_uris(input, FileType::Plaintext); let expected_links = - IntoIterator::into_iter([website("https://www.apache.org/licenses/LICENSE-2.0")]) + IntoIterator::into_iter([website!("https://www.apache.org/licenses/LICENSE-2.0")]) .collect::>(); assert_eq!(links, expected_links); diff --git a/lychee-lib/src/filter/mod.rs b/lychee-lib/src/filter/mod.rs index 6f767c076a..65cc810059 100644 --- a/lychee-lib/src/filter/mod.rs +++ b/lychee-lib/src/filter/mod.rs @@ -17,17 +17,17 @@ pub type PathExcludes = regex_filter::RegexFilter; use crate::Uri; -#[cfg(all(not(test), not(feature = "check_example_domains")))] /// These domains are explicitly defined by RFC 2606, section 3 Reserved Example /// Second Level Domain Names for describing example cases and should not be /// dereferenced as they should not have content. +#[cfg(all(not(test), not(feature = "check_example_domains")))] static EXAMPLE_DOMAINS: LazyLock> = LazyLock::new(|| { HashSet::from_iter(["example.com", "example.org", "example.net", "example.edu"]) }); -#[cfg(all(not(test), not(feature = "check_example_domains")))] /// We also exclude the example TLDs in section 2 of the same RFC. /// This exclusion gets subsumed by the `check_example_domains` feature. +#[cfg(all(not(test), not(feature = "check_example_domains")))] static EXAMPLE_TLDS: LazyLock> = LazyLock::new(|| HashSet::from_iter([".test", ".example", ".invalid", ".localhost"])); @@ -62,19 +62,19 @@ const FALSE_POSITIVE_PAT: &[&str] = &[ static FALSE_POSITIVE_SET: LazyLock = LazyLock::new(|| regex::RegexSet::new(FALSE_POSITIVE_PAT).expect("Failed to create RegexSet")); -#[inline] -#[must_use] /// The given input is a well-known false-positive, which won't be checked by /// default. This behavior can be explicitly overwritten by defining an /// `Include` pattern, which will match on a false positive +#[inline] +#[must_use] pub fn is_false_positive(input: &str) -> bool { FALSE_POSITIVE_SET.is_match(input) } -#[inline] -#[must_use] /// Check if the host belongs to a known example domain as defined in /// [RFC 2606](https://datatracker.ietf.org/doc/html/rfc2606) +#[inline] +#[must_use] pub fn is_example_domain(uri: &Uri) -> bool { match uri.domain() { Some(domain) => { @@ -101,9 +101,9 @@ pub fn is_example_domain(uri: &Uri) -> bool { } } +/// Check if the host belongs to a known unsupported domain #[inline] #[must_use] -/// Check if the host belongs to a known unsupported domain pub fn is_unsupported_domain(uri: &Uri) -> bool { if let Some(domain) = uri.domain() { // It is not enough to use `UNSUPPORTED_DOMAINS.contains(domain)` here @@ -262,13 +262,11 @@ impl Filter { #[cfg(test)] mod tests { use reqwest::Url; + use test_utils::{mail, website}; use url::Host; use super::{Excludes, Filter, Includes}; - use crate::{ - Uri, - test_utils::{mail, website}, - }; + use crate::Uri; // Note: the standard library, as of Rust stable 1.47.0, does not expose // "link-local" or "private" IPv6 checks. However, one might argue @@ -343,18 +341,18 @@ mod tests { // In this case, only the requests matching the include set will be checked let filter = Filter::default(); - assert!(!filter.is_excluded(&website("https://example.com"))); + assert!(!filter.is_excluded(&website!("https://example.com"))); } #[test] fn test_false_positives() { let filter = Filter::default(); - assert!(filter.is_excluded(&website("http://www.w3.org/1999/xhtml"))); - assert!(filter.is_excluded(&website( + assert!(filter.is_excluded(&website!("http://www.w3.org/1999/xhtml"))); + assert!(filter.is_excluded(&website!( "http://schemas.openxmlformats.org/markup-compatibility/2006" ))); - assert!(!filter.is_excluded(&website("https://example.com"))); + assert!(!filter.is_excluded(&website!("https://example.com"))); } #[test] @@ -364,7 +362,7 @@ mod tests { includes: Some(includes), ..Filter::default() }; - assert!(!filter.is_excluded(&website("http://www.w3.org/1999/xhtml"))); + assert!(!filter.is_excluded(&website!("http://www.w3.org/1999/xhtml"))); } #[test] @@ -376,9 +374,9 @@ mod tests { }; // Only the requests matching the include set will be checked - assert!(!filter.is_excluded(&website("https://foo.example.com"))); - assert!(filter.is_excluded(&website("https://bar.example.com"))); - assert!(filter.is_excluded(&website("https://example.com"))); + assert!(!filter.is_excluded(&website!("https://foo.example.com"))); + assert!(filter.is_excluded(&website!("https://bar.example.com"))); + assert!(filter.is_excluded(&website!("https://example.com"))); } #[test] @@ -387,9 +385,9 @@ mod tests { ..Filter::default() }; - assert!(filter.is_excluded(&mail("mail@example.com"))); - assert!(filter.is_excluded(&mail("foo@bar.dev"))); - assert!(!filter.is_excluded(&website("http://bar.dev"))); + assert!(filter.is_excluded(&mail!("mail@example.com"))); + assert!(filter.is_excluded(&mail!("foo@bar.dev"))); + assert!(!filter.is_excluded(&website!("http://bar.dev"))); } #[test] @@ -399,9 +397,9 @@ mod tests { ..Filter::default() }; - assert!(!filter.is_excluded(&mail("mail@example.com"))); - assert!(!filter.is_excluded(&mail("foo@bar.dev"))); - assert!(!filter.is_excluded(&website("http://bar.dev"))); + assert!(!filter.is_excluded(&mail!("mail@example.com"))); + assert!(!filter.is_excluded(&mail!("foo@bar.dev"))); + assert!(!filter.is_excluded(&website!("http://bar.dev"))); } #[test] @@ -413,12 +411,12 @@ mod tests { ..Filter::default() }; - assert!(filter.is_excluded(&website("https://github.com"))); - assert!(filter.is_excluded(&website("http://exclude.org"))); - assert!(filter.is_excluded(&mail("mail@example.com"))); + assert!(filter.is_excluded(&website!("https://github.com"))); + assert!(filter.is_excluded(&website!("http://exclude.org"))); + assert!(filter.is_excluded(&mail!("mail@example.com"))); - assert!(!filter.is_excluded(&website("http://bar.dev"))); - assert!(filter.is_excluded(&mail("foo@bar.dev"))); + assert!(!filter.is_excluded(&website!("http://bar.dev"))); + assert!(filter.is_excluded(&mail!("foo@bar.dev"))); } #[test] fn test_exclude_include_regex() { @@ -431,24 +429,24 @@ mod tests { }; // Includes take preference over excludes - assert!(!filter.is_excluded(&website("https://foo.example.com")),); + assert!(!filter.is_excluded(&website!("https://foo.example.com")),); - assert!(filter.is_excluded(&website("https://example.com"))); - assert!(filter.is_excluded(&website("https://bar.example.com"))); + assert!(filter.is_excluded(&website!("https://example.com"))); + assert!(filter.is_excluded(&website!("https://bar.example.com"))); } #[test] fn test_excludes_no_private_ips_by_default() { let filter = Filter::default(); - assert!(!filter.is_excluded(&website(V4_PRIVATE_CLASS_A))); - assert!(!filter.is_excluded(&website(V4_PRIVATE_CLASS_B))); - assert!(!filter.is_excluded(&website(V4_PRIVATE_CLASS_C))); - assert!(!filter.is_excluded(&website(V4_LINK_LOCAL_1))); - assert!(!filter.is_excluded(&website(V4_LINK_LOCAL_2))); - assert!(!filter.is_excluded(&website(V4_LOOPBACK))); - assert!(!filter.is_excluded(&website(V6_LOOPBACK))); - assert!(!filter.is_excluded(&website("http://localhost"))); + assert!(!filter.is_excluded(&website!(V4_PRIVATE_CLASS_A))); + assert!(!filter.is_excluded(&website!(V4_PRIVATE_CLASS_B))); + assert!(!filter.is_excluded(&website!(V4_PRIVATE_CLASS_C))); + assert!(!filter.is_excluded(&website!(V4_LINK_LOCAL_1))); + assert!(!filter.is_excluded(&website!(V4_LINK_LOCAL_2))); + assert!(!filter.is_excluded(&website!(V4_LOOPBACK))); + assert!(!filter.is_excluded(&website!(V6_LOOPBACK))); + assert!(!filter.is_excluded(&website!("http://localhost"))); } #[test] @@ -458,9 +456,9 @@ mod tests { ..Filter::default() }; - assert!(filter.is_excluded(&website(V4_PRIVATE_CLASS_A))); - assert!(filter.is_excluded(&website(V4_PRIVATE_CLASS_B))); - assert!(filter.is_excluded(&website(V4_PRIVATE_CLASS_C))); + assert!(filter.is_excluded(&website!(V4_PRIVATE_CLASS_A))); + assert!(filter.is_excluded(&website!(V4_PRIVATE_CLASS_B))); + assert!(filter.is_excluded(&website!(V4_PRIVATE_CLASS_C))); } #[test] @@ -470,8 +468,8 @@ mod tests { ..Filter::default() }; - assert!(filter.is_excluded(&website(V4_LINK_LOCAL_1))); - assert!(filter.is_excluded(&website(V4_LINK_LOCAL_2))); + assert!(filter.is_excluded(&website!(V4_LINK_LOCAL_1))); + assert!(filter.is_excluded(&website!(V4_LINK_LOCAL_2))); } #[test] @@ -481,9 +479,9 @@ mod tests { ..Filter::default() }; - assert!(filter.is_excluded(&website(V4_LOOPBACK))); - assert!(filter.is_excluded(&website(V6_LOOPBACK))); - assert!(filter.is_excluded(&website("http://localhost"))); + assert!(filter.is_excluded(&website!(V4_LOOPBACK))); + assert!(filter.is_excluded(&website!(V6_LOOPBACK))); + assert!(filter.is_excluded(&website!("http://localhost"))); } #[test] @@ -495,7 +493,7 @@ mod tests { }; // if these were pure IPv4, we would exclude - assert!(!filter.is_excluded(&website(V6_MAPPED_V4_PRIVATE_CLASS_A))); - assert!(!filter.is_excluded(&website(V6_MAPPED_V4_LINK_LOCAL))); + assert!(!filter.is_excluded(&website!(V6_MAPPED_V4_PRIVATE_CLASS_A))); + assert!(!filter.is_excluded(&website!(V6_MAPPED_V4_LINK_LOCAL))); } } diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index c38969e926..37dfd62ea9 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -73,11 +73,6 @@ pub mod remap; /// local IPs or e-mail addresses pub mod filter; -/// Test utilities -#[cfg(test)] -#[macro_use] -pub mod test_utils; - #[cfg(test)] use doc_comment as _; // required for doctest use ring as _; // required for apple silicon @@ -100,7 +95,7 @@ pub use crate::{ types::{ AcceptRange, AcceptRangeError, Base, BasicAuthCredentials, BasicAuthSelector, CacheStatus, CookieJar, ErrorKind, FileExtensions, FileType, Input, InputContent, InputResolver, - InputSource, Request, ResolvedInputSource, Response, ResponseBody, Result, Status, - StatusCodeExcluder, StatusCodeSelector, uri::valid::Uri, + InputSource, Redirects, Request, ResolvedInputSource, Response, ResponseBody, Result, + Status, StatusCodeExcluder, StatusCodeSelector, uri::valid::Uri, }, }; diff --git a/lychee-lib/src/retry.rs b/lychee-lib/src/retry.rs index b6c6b66e19..a7d8600ae1 100644 --- a/lychee-lib/src/retry.rs +++ b/lychee-lib/src/retry.rs @@ -121,7 +121,7 @@ impl RetryExt for Status { Status::Ok(_) => false, Status::Error(err) => err.should_retry(), Status::Timeout(_) => true, - Status::Redirected(_) => false, + Status::Redirected(_, _) => false, Status::UnknownStatusCode(_) => false, Status::Excluded => false, Status::Unsupported(_) => false, diff --git a/lychee-lib/src/test_utils.rs b/lychee-lib/src/test_utils.rs deleted file mode 100644 index 98d65ef3d4..0000000000 --- a/lychee-lib/src/test_utils.rs +++ /dev/null @@ -1,100 +0,0 @@ -use std::{ - convert::TryFrom, - fs, - path::{Path, PathBuf}, -}; - -use reqwest::Url; - -use crate::{ClientBuilder, ErrorKind, Request, Uri}; - -#[macro_export] -/// Creates a mock web server, which responds with a predefined status when -/// handling a matching request -macro_rules! mock_server { - ($status:expr $(, $func:tt ($($arg:expr),*))*) => {{ - let mock_server = wiremock::MockServer::start().await; - let response_template = wiremock::ResponseTemplate::new(http::StatusCode::from($status)); - let template = response_template$(.$func($($arg),*))*; - wiremock::Mock::given(wiremock::matchers::method("GET")).respond_with(template).mount(&mock_server).await; - mock_server - }}; -} - -pub(crate) async fn get_mock_client_response(request: T) -> crate::Response -where - Request: TryFrom, - ErrorKind: From, -{ - ClientBuilder::default() - .client() - .unwrap() - .check(request) - .await - .unwrap() -} - -/// Helper method to convert a string into a URI -/// -/// # Panic -/// -/// This panics on error, so it should only be used for testing -pub(crate) fn website(url: &str) -> Uri { - Uri::from(Url::parse(url).expect("Expected valid Website URI")) -} - -/// Helper method to convert a `std::path::Path `into a URI with the `file` scheme -/// -/// # Panic -/// -/// This panics if the given path is not absolute, so it should only be used for -/// testing -pub(crate) fn path>(path: P) -> Uri { - Uri::from(Url::from_file_path(path.as_ref()).expect("Expected valid File URI")) -} - -/// Creates a mail URI from a string -pub(crate) fn mail(address: &str) -> Uri { - if address.starts_with("mailto:") { - Url::parse(address) - } else { - Url::parse(&(String::from("mailto:") + address)) - } - .expect("Expected valid Mail Address") - .into() -} - -/// Returns the path to the `fixtures` directory. -/// -/// # Panic -/// -/// Panics if the fixtures directory could not be determined. -pub(crate) fn fixtures_path() -> PathBuf { - Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .join("fixtures") -} - -/// Loads a fixture from the `fixtures` directory -pub(crate) fn load_fixture(filename: &str) -> String { - let path = fixtures_path().join(filename); - fs::read_to_string(path).unwrap() -} - -/// Constructs a [`Uri`] from a given subpath within the `fixtures` directory. -/// -/// The specified subpath may contain a fragment reference by ending with `#something`. -/// The subpath should not begin with a slash, otherwise it will be treated as an -/// absolute path. -pub(crate) fn fixture_uri(subpath: &str) -> Uri { - let fixture_url = - Url::from_directory_path(fixtures_path()).expect("fixture path should be a valid URL"); - - // joining subpath onto a Url allows the subpath to contain a fragment - let url = fixture_url - .join(subpath) - .expect("expected subpath to form a valid URL"); - - Uri::from(url) -} diff --git a/lychee-lib/src/types/cache.rs b/lychee-lib/src/types/cache.rs index aa42a50a9d..ce89f15d9a 100644 --- a/lychee-lib/src/types/cache.rs +++ b/lychee-lib/src/types/cache.rs @@ -73,7 +73,7 @@ impl From<&Status> for CacheStatus { Status::Ok(code) | Status::UnknownStatusCode(code) => Self::Ok(code.as_u16()), Status::Excluded => Self::Excluded, Status::Unsupported(_) => Self::Unsupported, - Status::Redirected(code) => Self::Error(Some(code.as_u16())), + Status::Redirected(code, _) => Self::Error(Some(code.as_u16())), Status::Timeout(code) => Self::Error(code.map(|code| code.as_u16())), Status::Error(e) => match e { ErrorKind::RejectedStatusCode(code) => Self::Error(Some(code.as_u16())), diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 58b9e8cdec..30019b9bc2 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -158,10 +158,6 @@ pub enum ErrorKind { #[error("Error when using regex engine: {0}")] Regex(#[from] regex::Error), - /// Too many redirects (HTTP 3xx) were encountered (configurable) - #[error("Too many redirects")] - TooManyRedirects(#[source] reqwest::Error), - /// Basic auth extractor error #[error("Basic auth extractor error")] BasicAuthExtractorError(#[from] BasicAuthExtractorError), @@ -325,9 +321,6 @@ impl ErrorKind { ErrorKind::Regex(error) => Some(format!( "Regular expression error: {error}. Check regex syntax", )), - ErrorKind::TooManyRedirects(_error) => Some( - "Too many redirects. Check for redirect loops or increase redirect limit".to_string() - ), ErrorKind::BasicAuthExtractorError(basic_auth_extractor_error) => Some(format!( "Basic authentication error: {basic_auth_extractor_error}. Check credentials format", )), @@ -402,9 +395,6 @@ impl PartialEq for ErrorKind { (Self::Regex(e1), Self::Regex(e2)) => e1.to_string() == e2.to_string(), (Self::DirTraversal(e1), Self::DirTraversal(e2)) => e1.to_string() == e2.to_string(), (Self::Channel(_), Self::Channel(_)) => true, - (Self::TooManyRedirects(e1), Self::TooManyRedirects(e2)) => { - e1.to_string() == e2.to_string() - } (Self::BasicAuthExtractorError(e1), Self::BasicAuthExtractorError(e2)) => { e1.to_string() == e2.to_string() } @@ -417,6 +407,7 @@ impl PartialEq for ErrorKind { (Self::InvalidBase(b1, e1), Self::InvalidBase(b2, e2)) => b1 == b2 && e1 == e2, (Self::InvalidUrlRemap(r1), Self::InvalidUrlRemap(r2)) => r1 == r2, (Self::EmptyUrl, Self::EmptyUrl) => true, + (Self::RejectedStatusCode(c1), Self::RejectedStatusCode(c2)) => c1 == c2, #[cfg(any(test, debug_assertions))] (Self::TestError, Self::TestError) => true, @@ -474,7 +465,6 @@ impl Hash for ErrorKind { std::mem::discriminant(self).hash(state); } Self::Regex(e) => e.to_string().hash(state), - Self::TooManyRedirects(e) => e.to_string().hash(state), Self::BasicAuthExtractorError(e) => e.to_string().hash(state), Self::Cookies(e) => e.to_string().hash(state), Self::StatusCodeSelectorError(e) => e.to_string().hash(state), diff --git a/lychee-lib/src/types/mod.rs b/lychee-lib/src/types/mod.rs index 4d41c5e830..679b26f972 100644 --- a/lychee-lib/src/types/mod.rs +++ b/lychee-lib/src/types/mod.rs @@ -9,6 +9,7 @@ mod error; mod file; mod input; pub(crate) mod mail; +pub(crate) mod redirect_history; mod request; pub(crate) mod resolver; mod response; @@ -24,6 +25,7 @@ pub use cookies::CookieJar; pub use error::ErrorKind; pub use file::{FileExtensions, FileType}; pub use input::{Input, InputContent, InputResolver, InputSource, ResolvedInputSource}; +pub use redirect_history::Redirects; pub use request::Request; pub use response::{Response, ResponseBody}; pub use status::Status; diff --git a/lychee-lib/src/types/redirect_history.rs b/lychee-lib/src/types/redirect_history.rs new file mode 100644 index 0000000000..e6468d6669 --- /dev/null +++ b/lychee-lib/src/types/redirect_history.rs @@ -0,0 +1,74 @@ +use crate::Status; +use serde::Serialize; +use std::{ + collections::HashMap, + sync::{Arc, Mutex}, +}; +use url::Url; + +/// A list of URLs that were followed through HTTP redirects, +/// starting from the original URL and ending at the final destination. +/// Each entry in the list represents a step in the redirect sequence. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)] +pub struct Redirects(Vec); + +impl From> for Redirects { + fn from(value: Vec) -> Self { + Self(value) + } +} + +impl Redirects { + /// Count how many times a redirect was followed. + /// This is the length of the list minus one. + pub(crate) fn count(&self) -> usize { + self.0.len().saturating_sub(1) + } + + /// Represents zero redirects + #[must_use] + pub const fn none() -> Self { + Redirects(vec![]) + } +} + +/// Keep track of HTTP redirections for reporting +#[derive(Debug, Clone)] +pub(crate) struct RedirectHistory(Arc>>); + +impl RedirectHistory { + pub(crate) fn new() -> Self { + Self(Arc::new(Mutex::new(HashMap::new()))) + } + + /// Records a redirect chain, using the original URL as the key. + /// + /// The first URL in the chain is treated as the original request URL, + /// and the entire chain (including the original) is stored as the value. + /// This allows later lookups of redirect paths by the initial URL. + pub(crate) fn record_redirects(&self, redirects: &[Url]) { + if let (Ok(mut map), Some(first)) = (self.0.lock(), redirects.first()) { + map.insert(first.clone(), Redirects(redirects.to_vec())); + } + } + + pub(crate) fn handle_redirected(&self, url: &Url, status: Status) -> Status { + match status { + Status::Ok(code) => self + .get_resolved(url) + .map(|redirects| Status::Redirected(code, redirects)) + .unwrap_or(Status::Ok(code)), + other => other, + } + } + + fn get_resolved(&self, original: &Url) -> Option { + self.0.lock().ok()?.get(original).cloned() + } +} + +impl Default for RedirectHistory { + fn default() -> Self { + Self::new() + } +} diff --git a/lychee-lib/src/types/response.rs b/lychee-lib/src/types/response.rs index 7a54d6f291..96e62c117f 100644 --- a/lychee-lib/src/types/response.rs +++ b/lychee-lib/src/types/response.rs @@ -62,9 +62,9 @@ impl Serialize for Response { } } +/// Encapsulates the state of a URI check #[allow(clippy::module_name_repetitions)] #[derive(Debug, Serialize, Hash, PartialEq, Eq)] -/// Encapsulates the state of a URI check pub struct ResponseBody { #[serde(flatten)] /// The URI which was checked diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index 8674750329..58aa4b233c 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -1,22 +1,21 @@ use std::{collections::HashSet, fmt::Display}; +use super::CacheStatus; +use super::redirect_history::Redirects; +use crate::ErrorKind; use http::StatusCode; use reqwest::Response; use serde::ser::SerializeStruct; use serde::{Serialize, Serializer}; -use crate::ErrorKind; - -use super::CacheStatus; - -const ICON_OK: &str = "\u{2714}"; // āœ” -const ICON_REDIRECTED: &str = "\u{21c4}"; // ⇄ -const ICON_EXCLUDED: &str = "\u{003f}"; // ? +const ICON_OK: &str = "āœ”"; +const ICON_REDIRECTED: &str = "⇄"; +const ICON_EXCLUDED: &str = "?"; const ICON_UNSUPPORTED: &str = "\u{003f}"; // ? (using same icon, but under different name for explicitness) -const ICON_UNKNOWN: &str = "\u{003f}"; // ? -const ICON_ERROR: &str = "\u{2717}"; // āœ— -const ICON_TIMEOUT: &str = "\u{29d6}"; // ā§– -const ICON_CACHED: &str = "\u{21bb}"; // ↻ +const ICON_UNKNOWN: &str = "?"; +const ICON_ERROR: &str = "āœ—"; +const ICON_TIMEOUT: &str = "ā§–"; +const ICON_CACHED: &str = "↻"; /// Response status of the request. #[allow(variant_size_differences)] @@ -29,7 +28,7 @@ pub enum Status { /// Request timed out Timeout(Option), /// Got redirected to different resource - Redirected(StatusCode), + Redirected(StatusCode, Redirects), /// The given status code is not known by lychee UnknownStatusCode(StatusCode), /// Resource was excluded from checking @@ -46,7 +45,7 @@ impl Display for Status { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Status::Ok(code) => write!(f, "{code}"), - Status::Redirected(code) => write!(f, "Redirect ({code})"), + Status::Redirected(_, _) => write!(f, "Redirect"), Status::UnknownStatusCode(code) => write!(f, "Unknown status ({code})"), Status::Timeout(Some(code)) => write!(f, "Timeout ({code})"), Status::Timeout(None) => f.write_str("Timeout"), @@ -64,6 +63,7 @@ impl Serialize for Status { S: Serializer, { let mut s; + if let Some(code) = self.code() { s = serializer.serialize_struct("Status", 2)?; s.serialize_field("text", &self.to_string())?; @@ -76,6 +76,11 @@ impl Serialize for Status { s = serializer.serialize_struct("Status", 1)?; s.serialize_field("text", &self.to_string())?; } + + if let Status::Redirected(_, redirects) = self { + s.serialize_field("redirects", redirects)?; + } + s.end() } } @@ -135,7 +140,18 @@ impl Status { pub fn details(&self) -> Option { match &self { Status::Ok(code) => code.canonical_reason().map(String::from), - Status::Redirected(code) => code.canonical_reason().map(String::from), + Status::Redirected(code, redirects) => { + let count = redirects.count(); + let redirects = if count == 1 { "redirect" } else { "redirects" }; + + let result = code + .canonical_reason() + .map(String::from) + .unwrap_or(code.as_str().to_owned()); + Some(format!( + "Followed {count} {redirects} resolving to the final status of: {result}" + )) + } Status::Error(e) => e.details(), Status::Timeout(_) => None, Status::UnknownStatusCode(_) => None, @@ -194,7 +210,7 @@ impl Status { pub const fn icon(&self) -> &str { match self { Status::Ok(_) => ICON_OK, - Status::Redirected(_) => ICON_REDIRECTED, + Status::Redirected(_, _) => ICON_REDIRECTED, Status::UnknownStatusCode(_) => ICON_UNKNOWN, Status::Excluded => ICON_EXCLUDED, Status::Error(_) => ICON_ERROR, @@ -209,7 +225,7 @@ impl Status { pub fn code(&self) -> Option { match self { Status::Ok(code) - | Status::Redirected(code) + | Status::Redirected(code, _) | Status::UnknownStatusCode(code) | Status::Timeout(Some(code)) => Some(*code), Status::Error(kind) | Status::Unsupported(kind) => match kind { @@ -230,7 +246,7 @@ impl Status { #[must_use] pub fn code_as_string(&self) -> String { match self { - Status::Ok(code) | Status::Redirected(code) | Status::UnknownStatusCode(code) => { + Status::Ok(code) | Status::Redirected(code, _) | Status::UnknownStatusCode(code) => { code.as_str().to_string() } Status::Excluded => "EXCLUDED".to_string(), @@ -282,8 +298,6 @@ impl From for Status { fn from(e: reqwest::Error) -> Self { if e.is_timeout() { Self::Timeout(e.status()) - } else if e.is_redirect() { - Self::Error(ErrorKind::TooManyRedirects(e)) } else if e.is_builder() { Self::Unsupported(ErrorKind::BuildRequestClient(e)) } else if e.is_body() || e.is_decode() { @@ -296,7 +310,7 @@ impl From for Status { #[cfg(test)] mod tests { - use crate::{CacheStatus, ErrorKind, Status}; + use crate::{CacheStatus, ErrorKind, Status, types::redirect_history::Redirects}; use http::StatusCode; #[test] @@ -331,7 +345,7 @@ mod tests { 999 ); assert_eq!( - Status::Redirected(StatusCode::from_u16(300).unwrap()) + Status::Redirected(StatusCode::from_u16(300).unwrap(), Redirects::none()) .code() .unwrap(), 300 diff --git a/lychee-lib/src/types/uri/github.rs b/lychee-lib/src/types/uri/github.rs index cf154dda98..f6b3912dff 100644 --- a/lychee-lib/src/types/uri/github.rs +++ b/lychee-lib/src/types/uri/github.rs @@ -132,39 +132,39 @@ impl TryFrom<&Uri> for GithubUri { #[cfg(test)] mod tests { - use crate::test_utils::website; use super::*; + use test_utils::website; #[test] fn test_github() { assert_eq!( - GithubUri::try_from(website("http://github.com/lycheeverse/lychee")).unwrap(), + GithubUri::try_from(website!("http://github.com/lycheeverse/lychee")).unwrap(), GithubUri::new("lycheeverse", "lychee") ); assert_eq!( - GithubUri::try_from(website("http://www.github.com/lycheeverse/lychee")).unwrap(), + GithubUri::try_from(website!("http://www.github.com/lycheeverse/lychee")).unwrap(), GithubUri::new("lycheeverse", "lychee") ); assert_eq!( - GithubUri::try_from(website("https://github.com/lycheeverse/lychee")).unwrap(), + GithubUri::try_from(website!("https://github.com/lycheeverse/lychee")).unwrap(), GithubUri::new("lycheeverse", "lychee") ); assert_eq!( - GithubUri::try_from(website("https://github.com/lycheeverse/lychee/")).unwrap(), + GithubUri::try_from(website!("https://github.com/lycheeverse/lychee/")).unwrap(), GithubUri::new("lycheeverse", "lychee") ); assert_eq!( - GithubUri::try_from(website("https://github.com/lycheeverse/lychee/foo/bar")).unwrap(), + GithubUri::try_from(website!("https://github.com/lycheeverse/lychee/foo/bar")).unwrap(), GithubUri::with_endpoint("lycheeverse", "lychee", "foo/bar") ); assert_eq!( - GithubUri::try_from(website( + GithubUri::try_from(website!( "https://github.com/Microsoft/python-language-server.git" )) .unwrap(), @@ -172,7 +172,7 @@ mod tests { ); assert_eq!( - GithubUri::try_from(website( + GithubUri::try_from(website!( "https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md" )) .unwrap(), @@ -183,21 +183,21 @@ mod tests { #[test] fn test_github_false_positives() { assert!( - GithubUri::try_from(website("https://github.com/sponsors/analysis-tools-dev ")) + GithubUri::try_from(website!("https://github.com/sponsors/analysis-tools-dev ")) .is_err() ); assert!( - GithubUri::try_from(website( + GithubUri::try_from(website!( "https://github.com/marketplace/actions/lychee-broken-link-checker" )) .is_err() ); - assert!(GithubUri::try_from(website("https://github.com/features/actions")).is_err()); + assert!(GithubUri::try_from(website!("https://github.com/features/actions")).is_err()); assert!( - GithubUri::try_from(website( + GithubUri::try_from(website!( "https://pkg.go.dev/github.com/Debian/pkg-go-tools/cmd/pgt-gopath" )) .is_err() diff --git a/lychee-lib/src/types/uri/valid.rs b/lychee-lib/src/types/uri/valid.rs index 5582018d74..505ce6f0ae 100644 --- a/lychee-lib/src/types/uri/valid.rs +++ b/lychee-lib/src/types/uri/valid.rs @@ -282,11 +282,12 @@ impl Display for Uri { #[cfg(test)] mod tests { use super::*; - use crate::test_utils::{mail, website}; use std::{ convert::TryFrom, net::{IpAddr, Ipv4Addr, Ipv6Addr}, }; + use test_utils::mail; + use test_utils::website; #[test] fn test_ipv4_uri_is_loopback() { @@ -305,11 +306,11 @@ mod tests { assert!(Uri::try_from("").is_err()); assert_eq!( Uri::try_from("https://example.com"), - Ok(website("https://example.com")) + Ok(website!("https://example.com")) ); assert_eq!( Uri::try_from("https://example.com/@test/testing"), - Ok(website("https://example.com/@test/testing")) + Ok(website!("https://example.com/@test/testing")) ); } @@ -317,15 +318,15 @@ mod tests { fn test_uri_from_email_str() { assert_eq!( Uri::try_from("mail@example.com"), - Ok(mail("mail@example.com")) + Ok(mail!("mail@example.com")) ); assert_eq!( Uri::try_from("mailto:mail@example.com"), - Ok(mail("mail@example.com")) + Ok(mail!("mail@example.com")) ); assert_eq!( Uri::try_from("mail@example.com?foo=bar"), - Ok(mail("mail@example.com?foo=bar")) + Ok(mail!("mail@example.com?foo=bar")) ); } @@ -340,7 +341,7 @@ mod tests { #[test] fn test_uri_host_ip_v4() { assert_eq!( - website("http://127.0.0.1").host_ip(), + website!("http://127.0.0.1").host_ip(), Some(IpAddr::V4(Ipv4Addr::LOCALHOST)) ); } @@ -348,20 +349,20 @@ mod tests { #[test] fn test_uri_host_ip_v6() { assert_eq!( - website("https://[2020::0010]").host_ip(), + website!("https://[2020::0010]").host_ip(), Some(IpAddr::V6(Ipv6Addr::new(0x2020, 0, 0, 0, 0, 0, 0, 0x10))) ); } #[test] fn test_uri_host_ip_no_ip() { - assert!(website("https://some.cryptic/url").host_ip().is_none()); + assert!(website!("https://some.cryptic/url").host_ip().is_none()); } #[test] fn test_localhost() { assert_eq!( - website("http://127.0.0.1").host_ip(), + website!("http://127.0.0.1").host_ip(), Some(IpAddr::V4(Ipv4Addr::LOCALHOST)) ); } @@ -369,13 +370,13 @@ mod tests { #[test] fn test_convert_to_https() { assert_eq!( - website("http://example.com").to_https().unwrap(), - website("https://example.com") + website!("http://example.com").to_https().unwrap(), + website!("https://example.com") ); assert_eq!( - website("https://example.com").to_https().unwrap(), - website("https://example.com") + website!("https://example.com").to_https().unwrap(), + website!("https://example.com") ); } diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml new file mode 100644 index 0000000000..fbc823879d --- /dev/null +++ b/test-utils/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "test-utils" +authors = ["Thomas Zahner ", "Matthias Endler "] +edition = "2024" +description = "Common test utilities for the lychee project, intended for internal use only" +homepage = "https://github.com/lycheeverse/lychee" +license = "Apache-2.0 OR MIT" +repository = "https://github.com/lycheeverse/lychee" +version.workspace = true +readme = "../README.md" +rust-version = "1.85.0" +publish = false + +[dependencies] diff --git a/test-utils/src/lib.rs b/test-utils/src/lib.rs new file mode 100644 index 0000000000..17a150a243 --- /dev/null +++ b/test-utils/src/lib.rs @@ -0,0 +1,151 @@ +//! `test-utils` is used for testing in both `lychee-lib` and `lychee-bin`. +//! This crate does not depend on `lychee-lib` or `lychee-bin`, else we would get dependency cycles. +//! Macros are used instead, so that the importer is responsible for providing the dependencies. + +/// Create a mock web server, which responds with a predefined status when +/// handling a matching request +#[macro_export] +macro_rules! mock_server { + ($status:expr $(, $func:tt ($($arg:expr),*))*) => {{ + let mock_server = wiremock::MockServer::start().await; + let response_template = wiremock::ResponseTemplate::new(http::StatusCode::from($status)); + let template = response_template$(.$func($($arg),*))*; + wiremock::Mock::given(wiremock::matchers::method("GET")).respond_with(template).mount(&mock_server).await; + mock_server + }}; +} + +/// Set up a mock server which has two routes: `/ok` and `/redirect`. +/// Calling `/redirect` returns a HTTP Location header redirecting to `/ok` +#[macro_export] +macro_rules! redirecting_mock_server { + ($f:expr) => {{ + use std::str::FromStr; + use url::Url; + + async { + let mock_server = wiremock::MockServer::start().await; + let ok_url = Url::from_str(&format!("{}/ok", mock_server.uri())).unwrap(); + let redirect_url = Url::from_str(&format!("{}/redirect", mock_server.uri())).unwrap(); + + // Set up redirect + let redirect = wiremock::ResponseTemplate::new(StatusCode::PERMANENT_REDIRECT) + .insert_header("Location", ok_url.as_str()); + wiremock::Mock::given(wiremock::matchers::method("GET")) + .and(wiremock::matchers::path("/redirect")) + .respond_with(redirect) + .expect(1) // expect the redirect to be followed and called once + .mount(&mock_server) + .await; + + let ok = wiremock::ResponseTemplate::new(StatusCode::OK); + wiremock::Mock::given(wiremock::matchers::method("GET")) + .and(wiremock::matchers::path("/ok")) + .respond_with(ok) + .expect(1) // expect the redirect to be followed and called once + .mount(&mock_server) + .await; + + $f(redirect_url, ok_url).await; + } + }}; +} + +#[macro_export] +macro_rules! get_mock_client_response { + ($request:expr $(,)?) => { + async { + ClientBuilder::default() + .client() + .unwrap() + .check($request) + .await + .unwrap() + } + }; +} + +/// Helper method to convert a `std::path::Path `into a URI with the `file` scheme +/// +/// # Panic +/// +/// This panics if the given path is not absolute, so it should only be used for +/// testing +#[macro_export] +macro_rules! path { + ($path:expr) => { + Uri::from(Url::from_file_path(String::from($path)).expect("Expected valid File URI")) + }; +} + +/// Helper method to convert a string into a URI +/// +/// # Panic +/// +/// This panics on error, so it should only be used for testing +#[macro_export] +macro_rules! website { + ($url:expr) => {{ + use url::Url; + Uri::from(Url::parse($url).expect("Expected valid Website URL")) + }}; +} + +/// Creates a mail URI from a string +#[macro_export] +macro_rules! mail { + ($address:expr) => {{ + use url::Url; + + if $address.starts_with("mailto:") { + Url::parse($address) + } else { + Url::parse(&(String::from("mailto:") + $address)) + } + .expect("Expected valid Mail Address") + .into() + }}; +} + +/// Returns the path to the `fixtures` directory. +/// +/// # Panic +/// +/// Panics if the fixtures directory could not be determined. +#[macro_export] +macro_rules! fixtures_path { + () => { + std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .join("fixtures") + }; +} + +/// Loads a fixture from the `fixtures` directory +#[macro_export] +macro_rules! load_fixture { + ($filename:expr) => {{ + let path = fixtures_path!().join($filename); + std::fs::read_to_string(path).unwrap() + }}; +} + +/// Constructs a `Uri` from a given subpath within the `fixtures` directory. +/// +/// The specified subpath may contain a fragment reference by ending with `#something`. +/// The subpath should not begin with a slash, otherwise it will be treated as an +/// absolute path. +#[macro_export] +macro_rules! fixture_uri { + ($subpath:expr) => {{ + use url::Url; + let fixture_url = + Url::from_directory_path(fixtures_path!()).expect("fixture path should be a valid URL"); + + // joining subpath onto a Url allows the subpath to contain a fragment + fixture_url + .join($subpath) + .expect("expected subpath to form a valid URL") + }}; +}