From f7488ddfa11aa79bde17badbda9d2ae687191ea8 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 18:06:26 +1000 Subject: [PATCH 01/63] stash --- lychee-lib/src/collector.rs | 16 ++++++++++++---- lychee-lib/src/types/uri/raw.rs | 2 +- lychee-lib/src/utils/request.rs | 17 +++++++++-------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index e209c56a5e..606af56a18 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -1,5 +1,6 @@ use crate::ErrorKind; use crate::InputSource; +use crate::ResolvedInputSource; use crate::filter::PathExcludes; use crate::types::resolver::UrlContentResolver; use crate::{ @@ -19,6 +20,13 @@ use std::collections::HashSet; use std::path::PathBuf; use std::sync::Arc; +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct CollectError { + pub source: ResolvedInputSource, + pub raw_uri: RawUri, + pub error: ErrorKind, +} + /// Collector keeps the state of link collection /// It drives the link extraction from inputs #[allow(clippy::struct_excessive_bools)] @@ -223,7 +231,7 @@ impl Collector { /// Convenience method to fetch all unique links from inputs /// with the default extensions. - pub fn collect_links(self, inputs: HashSet) -> impl Stream> { + pub fn collect_links(self, inputs: HashSet) -> impl Stream> { self.collect_links_from_file_types(inputs, crate::types::FileType::default_extensions()) } @@ -238,7 +246,7 @@ impl Collector { self, inputs: HashSet, extensions: FileExtensions, - ) -> impl Stream> { + ) -> impl Stream> { let skip_missing_inputs = self.skip_missing_inputs; let skip_hidden = self.skip_hidden; let skip_ignored = self.skip_ignored; @@ -287,7 +295,7 @@ impl Collector { let root_dir = self.root_dir.clone(); let basic_auth_extractor = self.basic_auth_extractor.clone(); async move { - let content = content?; + let content = content.map_err?; let uris: Vec = extractor.extract(&content); let requests = request::create( uris, @@ -296,7 +304,7 @@ impl Collector { base.as_ref(), basic_auth_extractor.as_ref(), ); - Result::Ok(stream::iter(requests.into_iter().map(Ok))) + Result::Ok(stream::iter(requests.into_iter())) } }) .try_flatten() diff --git a/lychee-lib/src/types/uri/raw.rs b/lychee-lib/src/types/uri/raw.rs index 3ad51f2cf8..74a4ed910c 100644 --- a/lychee-lib/src/types/uri/raw.rs +++ b/lychee-lib/src/types/uri/raw.rs @@ -2,7 +2,7 @@ use std::fmt::Display; /// A raw URI that got extracted from a document with a fuzzy parser. /// Note that this can still be invalid according to stricter URI standards -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct RawUri { /// Unparsed URI represented as a `String`. There is no guarantee that it /// can be parsed into a URI object diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 693670ddef..7890d1a63a 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -1,3 +1,4 @@ +use crate::collector::CollectError; use log::warn; use percent_encoding::percent_decode_str; use reqwest::Url; @@ -123,18 +124,18 @@ pub(crate) fn create( root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, -) -> HashSet { +) -> HashSet> { let base = base.cloned().or_else(|| Base::from_source(source)); uris.into_iter() - .filter_map(|raw_uri| { - match create_request(&raw_uri, source, root_dir, base.as_ref(), extractor) { - Ok(request) => Some(request), - Err(e) => { - warn!("Error creating request: {e:?}"); - None + .map(|raw_uri| { + create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| { + CollectError { + source: source.clone(), + raw_uri: raw_uri, + error: e, } - } + }) }) .collect() } From a9fe9ad531361ba13c04c4133715971742906b7b Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 18:30:39 +1000 Subject: [PATCH 02/63] stash very broken --- lychee-bin/src/commands/check.rs | 7 +++++-- lychee-bin/src/commands/dump.rs | 2 +- lychee-bin/src/commands/mod.rs | 5 +++-- lychee-lib/src/collector.rs | 29 +++++++++++++++++++++-------- lychee-lib/src/types/input/input.rs | 1 + lychee-lib/src/utils/request.rs | 9 ++------- 6 files changed, 33 insertions(+), 20 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 0a266c6ffc..e6be82f9fc 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -12,7 +12,7 @@ use tokio_stream::wrappers::ReceiverStream; use lychee_lib::archive::Archive; use lychee_lib::{Client, ErrorKind, Request, Response, Uri}; -use lychee_lib::{InputSource, Result}; +use lychee_lib::{InputSource}; use lychee_lib::{ResponseBody, Status}; use crate::formatters::get_response_formatter; @@ -23,13 +23,16 @@ use crate::parse::parse_duration_secs; use crate::verbosity::Verbosity; use crate::{ExitCode, cache::Cache, stats::ResponseStats}; +use std::result::Result; + +use lychee_lib::collector::CollectError; use super::CommandParams; pub(crate) async fn check( params: CommandParams, ) -> Result<(ResponseStats, Arc, ExitCode)> where - S: futures::Stream>, + S: futures::Stream>> { // Setup let (send_req, recv_req) = mpsc::channel(params.cfg.max_concurrency); diff --git a/lychee-bin/src/commands/dump.rs b/lychee-bin/src/commands/dump.rs index 26df6f4018..a9dc82dfbe 100644 --- a/lychee-bin/src/commands/dump.rs +++ b/lychee-bin/src/commands/dump.rs @@ -13,7 +13,7 @@ use super::CommandParams; /// Dump all detected links to stdout without checking them pub(crate) async fn dump(params: CommandParams) -> Result where - S: futures::Stream>, + S: futures::Stream>> { let requests = params.requests; tokio::pin!(requests); diff --git a/lychee-bin/src/commands/mod.rs b/lychee-bin/src/commands/mod.rs index 1f00503f02..7735e8b9ed 100644 --- a/lychee-bin/src/commands/mod.rs +++ b/lychee-bin/src/commands/mod.rs @@ -10,14 +10,15 @@ use std::fs; use std::io::{self, Write}; use std::path::PathBuf; use std::sync::Arc; +use std::result::Result; use crate::cache::Cache; use crate::options::Config; -use lychee_lib::Result; use lychee_lib::{Client, Request}; +use lychee_lib::collector::CollectError; /// Parameters passed to every command -pub(crate) struct CommandParams>> { +pub(crate) struct CommandParams>> { pub(crate) client: Client, pub(crate) cache: Arc, pub(crate) requests: S, diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 606af56a18..cbdc45e1dd 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -20,11 +20,13 @@ use std::collections::HashSet; use std::path::PathBuf; use std::sync::Arc; +/// a #[derive(Debug, PartialEq, Eq, Hash)] -pub struct CollectError { - pub source: ResolvedInputSource, - pub raw_uri: RawUri, - pub error: ErrorKind, +pub enum CollectError { + /// a + CannotParseUrl(RawUri, ResolvedInputSource, ErrorKind), + /// a + CannotGetContents(InputSource, ErrorKind), } /// Collector keeps the state of link collection @@ -231,7 +233,10 @@ impl Collector { /// Convenience method to fetch all unique links from inputs /// with the default extensions. - pub fn collect_links(self, inputs: HashSet) -> impl Stream> { + pub fn collect_links( + self, + inputs: HashSet, + ) -> impl Stream> { self.collect_links_from_file_types(inputs, crate::types::FileType::default_extensions()) } @@ -271,6 +276,7 @@ impl Collector { let extensions = extensions.clone(); let resolver = resolver.clone(); let excluded_paths = excluded_paths.clone(); + let source = input.source.clone(); async move { let base = match &input.source { @@ -287,7 +293,14 @@ impl Collector { resolver, excluded_paths, ) - .map(move |content| (content, base.clone())) + .map(move |content| { + ( + content.map_err(|e| { + CollectError::CannotGetContents(source.clone(), e) + }), + base.clone(), + ) + }) } }) .flatten() @@ -295,7 +308,7 @@ impl Collector { let root_dir = self.root_dir.clone(); let basic_auth_extractor = self.basic_auth_extractor.clone(); async move { - let content = content.map_err?; + let content = content?; let uris: Vec = extractor.extract(&content); let requests = request::create( uris, @@ -304,7 +317,7 @@ impl Collector { base.as_ref(), basic_auth_extractor.as_ref(), ); - Result::Ok(stream::iter(requests.into_iter())) + std::result::Result::Ok(stream::iter(requests.into_iter())) } }) .try_flatten() diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index 1df0297f1d..cad036fa6c 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -7,6 +7,7 @@ use super::InputResolver; use super::content::InputContent; use super::source::InputSource; use super::source::ResolvedInputSource; +use crate::collector::CollectError; use crate::filter::PathExcludes; use crate::types::FileType; use crate::types::file::FileExtensions; diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 7890d1a63a..c6e661fd2f 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -129,13 +129,8 @@ pub(crate) fn create( uris.into_iter() .map(|raw_uri| { - create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| { - CollectError { - source: source.clone(), - raw_uri: raw_uri, - error: e, - } - }) + create_request(&raw_uri, source, root_dir, base.as_ref(), extractor) + .map_err(|e| CollectError::CannotParseUrl(raw_uri, source.clone(), e)) }) .collect() } From cef997e9b73a076e7c2b6bce39adf57da2686c0b Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 18:30:53 +1000 Subject: [PATCH 03/63] Revert "stash very broken" This reverts commit a9fe9ad531361ba13c04c4133715971742906b7b. --- lychee-bin/src/commands/check.rs | 7 ++----- lychee-bin/src/commands/dump.rs | 2 +- lychee-bin/src/commands/mod.rs | 5 ++--- lychee-lib/src/collector.rs | 29 ++++++++--------------------- lychee-lib/src/types/input/input.rs | 1 - lychee-lib/src/utils/request.rs | 9 +++++++-- 6 files changed, 20 insertions(+), 33 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index e6be82f9fc..0a266c6ffc 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -12,7 +12,7 @@ use tokio_stream::wrappers::ReceiverStream; use lychee_lib::archive::Archive; use lychee_lib::{Client, ErrorKind, Request, Response, Uri}; -use lychee_lib::{InputSource}; +use lychee_lib::{InputSource, Result}; use lychee_lib::{ResponseBody, Status}; use crate::formatters::get_response_formatter; @@ -23,16 +23,13 @@ use crate::parse::parse_duration_secs; use crate::verbosity::Verbosity; use crate::{ExitCode, cache::Cache, stats::ResponseStats}; -use std::result::Result; - -use lychee_lib::collector::CollectError; use super::CommandParams; pub(crate) async fn check( params: CommandParams, ) -> Result<(ResponseStats, Arc, ExitCode)> where - S: futures::Stream>> + S: futures::Stream>, { // Setup let (send_req, recv_req) = mpsc::channel(params.cfg.max_concurrency); diff --git a/lychee-bin/src/commands/dump.rs b/lychee-bin/src/commands/dump.rs index a9dc82dfbe..26df6f4018 100644 --- a/lychee-bin/src/commands/dump.rs +++ b/lychee-bin/src/commands/dump.rs @@ -13,7 +13,7 @@ use super::CommandParams; /// Dump all detected links to stdout without checking them pub(crate) async fn dump(params: CommandParams) -> Result where - S: futures::Stream>> + S: futures::Stream>, { let requests = params.requests; tokio::pin!(requests); diff --git a/lychee-bin/src/commands/mod.rs b/lychee-bin/src/commands/mod.rs index 7735e8b9ed..1f00503f02 100644 --- a/lychee-bin/src/commands/mod.rs +++ b/lychee-bin/src/commands/mod.rs @@ -10,15 +10,14 @@ use std::fs; use std::io::{self, Write}; use std::path::PathBuf; use std::sync::Arc; -use std::result::Result; use crate::cache::Cache; use crate::options::Config; +use lychee_lib::Result; use lychee_lib::{Client, Request}; -use lychee_lib::collector::CollectError; /// Parameters passed to every command -pub(crate) struct CommandParams>> { +pub(crate) struct CommandParams>> { pub(crate) client: Client, pub(crate) cache: Arc, pub(crate) requests: S, diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index cbdc45e1dd..606af56a18 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -20,13 +20,11 @@ use std::collections::HashSet; use std::path::PathBuf; use std::sync::Arc; -/// a #[derive(Debug, PartialEq, Eq, Hash)] -pub enum CollectError { - /// a - CannotParseUrl(RawUri, ResolvedInputSource, ErrorKind), - /// a - CannotGetContents(InputSource, ErrorKind), +pub struct CollectError { + pub source: ResolvedInputSource, + pub raw_uri: RawUri, + pub error: ErrorKind, } /// Collector keeps the state of link collection @@ -233,10 +231,7 @@ impl Collector { /// Convenience method to fetch all unique links from inputs /// with the default extensions. - pub fn collect_links( - self, - inputs: HashSet, - ) -> impl Stream> { + pub fn collect_links(self, inputs: HashSet) -> impl Stream> { self.collect_links_from_file_types(inputs, crate::types::FileType::default_extensions()) } @@ -276,7 +271,6 @@ impl Collector { let extensions = extensions.clone(); let resolver = resolver.clone(); let excluded_paths = excluded_paths.clone(); - let source = input.source.clone(); async move { let base = match &input.source { @@ -293,14 +287,7 @@ impl Collector { resolver, excluded_paths, ) - .map(move |content| { - ( - content.map_err(|e| { - CollectError::CannotGetContents(source.clone(), e) - }), - base.clone(), - ) - }) + .map(move |content| (content, base.clone())) } }) .flatten() @@ -308,7 +295,7 @@ impl Collector { let root_dir = self.root_dir.clone(); let basic_auth_extractor = self.basic_auth_extractor.clone(); async move { - let content = content?; + let content = content.map_err?; let uris: Vec = extractor.extract(&content); let requests = request::create( uris, @@ -317,7 +304,7 @@ impl Collector { base.as_ref(), basic_auth_extractor.as_ref(), ); - std::result::Result::Ok(stream::iter(requests.into_iter())) + Result::Ok(stream::iter(requests.into_iter())) } }) .try_flatten() diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index cad036fa6c..1df0297f1d 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -7,7 +7,6 @@ use super::InputResolver; use super::content::InputContent; use super::source::InputSource; use super::source::ResolvedInputSource; -use crate::collector::CollectError; use crate::filter::PathExcludes; use crate::types::FileType; use crate::types::file::FileExtensions; diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index c6e661fd2f..7890d1a63a 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -129,8 +129,13 @@ pub(crate) fn create( uris.into_iter() .map(|raw_uri| { - create_request(&raw_uri, source, root_dir, base.as_ref(), extractor) - .map_err(|e| CollectError::CannotParseUrl(raw_uri, source.clone(), e)) + create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| { + CollectError { + source: source.clone(), + raw_uri: raw_uri, + error: e, + } + }) }) .collect() } From 25f130b940814eac43fb513fd6b004138b9adac6 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 18:44:57 +1000 Subject: [PATCH 04/63] Revert "stash" This reverts commit f7488ddfa11aa79bde17badbda9d2ae687191ea8. --- lychee-lib/src/collector.rs | 16 ++++------------ lychee-lib/src/types/uri/raw.rs | 2 +- lychee-lib/src/utils/request.rs | 17 ++++++++--------- 3 files changed, 13 insertions(+), 22 deletions(-) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 606af56a18..e209c56a5e 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -1,6 +1,5 @@ use crate::ErrorKind; use crate::InputSource; -use crate::ResolvedInputSource; use crate::filter::PathExcludes; use crate::types::resolver::UrlContentResolver; use crate::{ @@ -20,13 +19,6 @@ use std::collections::HashSet; use std::path::PathBuf; use std::sync::Arc; -#[derive(Debug, PartialEq, Eq, Hash)] -pub struct CollectError { - pub source: ResolvedInputSource, - pub raw_uri: RawUri, - pub error: ErrorKind, -} - /// Collector keeps the state of link collection /// It drives the link extraction from inputs #[allow(clippy::struct_excessive_bools)] @@ -231,7 +223,7 @@ impl Collector { /// Convenience method to fetch all unique links from inputs /// with the default extensions. - pub fn collect_links(self, inputs: HashSet) -> impl Stream> { + pub fn collect_links(self, inputs: HashSet) -> impl Stream> { self.collect_links_from_file_types(inputs, crate::types::FileType::default_extensions()) } @@ -246,7 +238,7 @@ impl Collector { self, inputs: HashSet, extensions: FileExtensions, - ) -> impl Stream> { + ) -> impl Stream> { let skip_missing_inputs = self.skip_missing_inputs; let skip_hidden = self.skip_hidden; let skip_ignored = self.skip_ignored; @@ -295,7 +287,7 @@ impl Collector { let root_dir = self.root_dir.clone(); let basic_auth_extractor = self.basic_auth_extractor.clone(); async move { - let content = content.map_err?; + let content = content?; let uris: Vec = extractor.extract(&content); let requests = request::create( uris, @@ -304,7 +296,7 @@ impl Collector { base.as_ref(), basic_auth_extractor.as_ref(), ); - Result::Ok(stream::iter(requests.into_iter())) + Result::Ok(stream::iter(requests.into_iter().map(Ok))) } }) .try_flatten() diff --git a/lychee-lib/src/types/uri/raw.rs b/lychee-lib/src/types/uri/raw.rs index 74a4ed910c..3ad51f2cf8 100644 --- a/lychee-lib/src/types/uri/raw.rs +++ b/lychee-lib/src/types/uri/raw.rs @@ -2,7 +2,7 @@ use std::fmt::Display; /// A raw URI that got extracted from a document with a fuzzy parser. /// Note that this can still be invalid according to stricter URI standards -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct RawUri { /// Unparsed URI represented as a `String`. There is no guarantee that it /// can be parsed into a URI object diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 7890d1a63a..693670ddef 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -1,4 +1,3 @@ -use crate::collector::CollectError; use log::warn; use percent_encoding::percent_decode_str; use reqwest::Url; @@ -124,18 +123,18 @@ pub(crate) fn create( root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, -) -> HashSet> { +) -> HashSet { let base = base.cloned().or_else(|| Base::from_source(source)); uris.into_iter() - .map(|raw_uri| { - create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| { - CollectError { - source: source.clone(), - raw_uri: raw_uri, - error: e, + .filter_map(|raw_uri| { + match create_request(&raw_uri, source, root_dir, base.as_ref(), extractor) { + Ok(request) => Some(request), + Err(e) => { + warn!("Error creating request: {e:?}"); + None } - }) + } }) .collect() } From d272a2b00b285bddbaeeff2e5761a36e2ba4f9ae Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 18:48:52 +1000 Subject: [PATCH 05/63] feat: add CreateRequestItem error kind --- lychee-lib/src/types/error.rs | 13 +++++++++++++ lychee-lib/src/types/uri/raw.rs | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 66ad178d3d..ebd2bc62d4 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -7,7 +7,9 @@ use thiserror::Error; use tokio::task::JoinError; use super::InputContent; +use crate::ResolvedInputSource; use crate::types::StatusCodeSelectorError; +use crate::types::uri::raw::RawUri; use crate::{Uri, basic_auth::BasicAuthExtractorError, utils}; /// Kinds of status errors @@ -26,6 +28,10 @@ pub enum ErrorKind { #[error("Error creating request client: {0}")] BuildRequestClient(#[source] reqwest::Error), + /// TODO: a + #[error("Error creating request: {0}")] + CreateRequestItem(RawUri, ResolvedInputSource, #[source] Box), + /// Network error while using GitHub API #[error("Network error (GitHub client)")] GithubRequest(#[from] Box), @@ -249,6 +255,9 @@ impl ErrorKind { ErrorKind::BuildRequestClient(error) => Some(format!( "Failed to create HTTP client: {error}. Check system configuration", )), + ErrorKind::CreateRequestItem(_, _, _) => Some(format!( + "TODO: details creat request utem" + )), ErrorKind::RuntimeJoin(join_error) => Some(format!( "Task execution failed: {join_error}. Internal processing error" )), @@ -374,6 +383,9 @@ impl PartialEq for ErrorKind { (Self::BuildRequestClient(e1), Self::BuildRequestClient(e2)) => { e1.to_string() == e2.to_string() } + (Self::CreateRequestItem(uri1, s1, e1), Self::CreateRequestItem(uri2, s2, e2)) => { + uri1 == uri2 && s1 == s2 && e1.to_string() == e2.to_string() + } (Self::RuntimeJoin(e1), Self::RuntimeJoin(e2)) => e1.to_string() == e2.to_string(), (Self::ReadFileInput(e1, s1), Self::ReadFileInput(e2, s2)) => { e1.kind() == e2.kind() && s1 == s2 @@ -433,6 +445,7 @@ impl Hash for ErrorKind { Self::NetworkRequest(e) => e.to_string().hash(state), Self::ReadResponseBody(e) => e.to_string().hash(state), Self::BuildRequestClient(e) => e.to_string().hash(state), + Self::CreateRequestItem(uri, s, e) => (uri, s, e.to_string()).hash(state), Self::BuildGithubClient(e) => e.to_string().hash(state), Self::GithubRequest(e) => e.to_string().hash(state), Self::InvalidGithubUrl(s) => s.hash(state), diff --git a/lychee-lib/src/types/uri/raw.rs b/lychee-lib/src/types/uri/raw.rs index 3ad51f2cf8..74a4ed910c 100644 --- a/lychee-lib/src/types/uri/raw.rs +++ b/lychee-lib/src/types/uri/raw.rs @@ -2,7 +2,7 @@ use std::fmt::Display; /// A raw URI that got extracted from a document with a fuzzy parser. /// Note that this can still be invalid according to stricter URI standards -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct RawUri { /// Unparsed URI represented as a `String`. There is no guarantee that it /// can be parsed into a URI object From f5fd8df5eb431f147b66046b87cc289b9b2e0245 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 18:52:05 +1000 Subject: [PATCH 06/63] wrap --- lychee-lib/src/collector.rs | 2 +- lychee-lib/src/utils/request.rs | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index e209c56a5e..3baa2c4049 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -296,7 +296,7 @@ impl Collector { base.as_ref(), basic_auth_extractor.as_ref(), ); - Result::Ok(stream::iter(requests.into_iter().map(Ok))) + Result::Ok(stream::iter(requests.into_iter())) } }) .try_flatten() diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 693670ddef..1ab568c130 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -123,18 +123,14 @@ pub(crate) fn create( root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, -) -> HashSet { +) -> HashSet> { let base = base.cloned().or_else(|| Base::from_source(source)); uris.into_iter() - .filter_map(|raw_uri| { - match create_request(&raw_uri, source, root_dir, base.as_ref(), extractor) { - Ok(request) => Some(request), - Err(e) => { - warn!("Error creating request: {e:?}"); - None - } - } + .map(|raw_uri| { + create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| + ErrorKind::CreateRequestItem(raw_uri.clone(), source.clone(), Box::new(e)) + ) }) .collect() } From e4c5482b4f0150bae1cc83c7c91ea42c1a008b32 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 19:10:09 +1000 Subject: [PATCH 07/63] smuggle request errors through CreateRequestItem --- lychee-bin/src/commands/check.rs | 40 ++++++++++++++++++++------------ lychee-bin/src/commands/dump.rs | 7 +++++- lychee-lib/src/utils/request.rs | 4 ++-- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 0a266c6ffc..3cae7427d3 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -180,15 +180,18 @@ where { tokio::pin!(requests); while let Some(request) = requests.next().await { - let request = request?; + let request = match request { + x @ Ok(_) => x, + x @ Err(ErrorKind::CreateRequestItem(_, _, _)) => x, + Err(e) => Err(e)?, + }; if let Some(pb) = &bar { pb.inc_length(1); - pb.set_message(request.to_string()); + if let Ok(request) = &request { + pb.set_message(request.to_string()); + } } - send_req - .send(Ok(request)) - .await - .expect("Cannot send request"); + send_req.send(request).await.expect("Cannot send request"); } Ok(()) } @@ -240,15 +243,22 @@ async fn request_channel_task( ReceiverStream::new(recv_req), max_concurrency, |request: Result| async { - let request = request.expect("cannot read request"); - let response = handle( - &client, - cache.clone(), - cache_exclude_status.clone(), - request, - accept.clone(), - ) - .await; + let response = match request { + Ok(request) => { + handle( + &client, + cache.clone(), + cache_exclude_status.clone(), + request, + accept.clone(), + ) + .await + } + Err(ErrorKind::CreateRequestItem(uri, src, e)) => { + Response::new(Uri::try_from("https://google.com").unwrap(), Status::Error(*e), src) + } + Err(e) => Err(e).expect("cannot read request"), + }; send_resp .send(response) diff --git a/lychee-bin/src/commands/dump.rs b/lychee-bin/src/commands/dump.rs index 26df6f4018..49847564e3 100644 --- a/lychee-bin/src/commands/dump.rs +++ b/lychee-bin/src/commands/dump.rs @@ -1,4 +1,5 @@ use log::error; +use lychee_lib::ErrorKind; use lychee_lib::Request; use lychee_lib::Result; use std::fs; @@ -25,7 +26,11 @@ where let mut writer = super::create_writer(params.cfg.output)?; while let Some(request) = requests.next().await { - let mut request = request?; + let mut request = match request { + Ok(x) => x, + Err(ErrorKind::CreateRequestItem(_, _, _)) => continue, + err @ Err(_) => err?, + }; // Apply URI remappings (if any) params.client.remap(&mut request.uri)?; diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 1ab568c130..0ca2882c27 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -128,9 +128,9 @@ pub(crate) fn create( uris.into_iter() .map(|raw_uri| { - create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| + create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| { ErrorKind::CreateRequestItem(raw_uri.clone(), source.clone(), Box::new(e)) - ) + }) }) .collect() } From 963b27644be7c41a3b1f083a47664a16b8fe2dde Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 19:18:34 +1000 Subject: [PATCH 08/63] stash Result with RawUri --- lychee-lib/src/client.rs | 4 ++-- lychee-lib/src/types/response.rs | 12 +++++++++--- lychee-lib/src/types/uri/raw.rs | 4 +++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 63306bb91c..658ab9ccaf 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -494,7 +494,7 @@ impl Client { self.remap(uri)?; if self.is_excluded(uri) { - return Ok(Response::new(uri.clone(), Status::Excluded, source)); + return Ok(Response::new(Ok(uri.clone()), Status::Excluded, source)); } let status = match uri.scheme() { @@ -505,7 +505,7 @@ impl Client { _ => self.check_website(uri, credentials).await?, }; - Ok(Response::new(uri.clone(), status, source)) + Ok(Response::new(Ok(uri.clone()), status, source)) } /// Check a single file using the file checker. diff --git a/lychee-lib/src/types/response.rs b/lychee-lib/src/types/response.rs index 96e62c117f..a00358e29c 100644 --- a/lychee-lib/src/types/response.rs +++ b/lychee-lib/src/types/response.rs @@ -1,8 +1,10 @@ use std::fmt::Display; +use std::result::Result; use http::StatusCode; use serde::Serialize; +use crate::types::uri::raw::RawUri; use crate::{ResolvedInputSource, Status, Uri}; /// Response type returned by lychee after checking a URI @@ -20,7 +22,7 @@ impl Response { #[inline] #[must_use] /// Create new response - pub const fn new(uri: Uri, status: Status, source: ResolvedInputSource) -> Self { + pub const fn new(uri: Result, status: Status, source: ResolvedInputSource) -> Self { Response(source, ResponseBody { uri, status }) } @@ -68,7 +70,7 @@ impl Serialize for Response { pub struct ResponseBody { #[serde(flatten)] /// The URI which was checked - pub uri: Uri, + pub uri: Result, /// The status of the check pub status: Status, } @@ -80,7 +82,11 @@ pub struct ResponseBody { impl Display for ResponseBody { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { // Always write the URI - write!(f, "{}", self.uri)?; + + match &self.uri { + Ok(x) => write!(f, "{}", x)?, + Err(x) => write!(f, "{}", x)?, + } // Early return for OK status to avoid verbose output if matches!(self.status, Status::Ok(StatusCode::OK)) { diff --git a/lychee-lib/src/types/uri/raw.rs b/lychee-lib/src/types/uri/raw.rs index 74a4ed910c..19fee81222 100644 --- a/lychee-lib/src/types/uri/raw.rs +++ b/lychee-lib/src/types/uri/raw.rs @@ -1,8 +1,10 @@ use std::fmt::Display; +use serde::Serialize; + /// A raw URI that got extracted from a document with a fuzzy parser. /// Note that this can still be invalid according to stricter URI standards -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)] pub struct RawUri { /// Unparsed URI represented as a `String`. There is no guarantee that it /// can be parsed into a URI object From 9461325e8a85b1fc46209f8a1de1183405c70a4e Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 19:18:37 +1000 Subject: [PATCH 09/63] Revert "stash Result with RawUri" This reverts commit 963b27644be7c41a3b1f083a47664a16b8fe2dde. --- lychee-lib/src/client.rs | 4 ++-- lychee-lib/src/types/response.rs | 12 +++--------- lychee-lib/src/types/uri/raw.rs | 4 +--- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 658ab9ccaf..63306bb91c 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -494,7 +494,7 @@ impl Client { self.remap(uri)?; if self.is_excluded(uri) { - return Ok(Response::new(Ok(uri.clone()), Status::Excluded, source)); + return Ok(Response::new(uri.clone(), Status::Excluded, source)); } let status = match uri.scheme() { @@ -505,7 +505,7 @@ impl Client { _ => self.check_website(uri, credentials).await?, }; - Ok(Response::new(Ok(uri.clone()), status, source)) + Ok(Response::new(uri.clone(), status, source)) } /// Check a single file using the file checker. diff --git a/lychee-lib/src/types/response.rs b/lychee-lib/src/types/response.rs index a00358e29c..96e62c117f 100644 --- a/lychee-lib/src/types/response.rs +++ b/lychee-lib/src/types/response.rs @@ -1,10 +1,8 @@ use std::fmt::Display; -use std::result::Result; use http::StatusCode; use serde::Serialize; -use crate::types::uri::raw::RawUri; use crate::{ResolvedInputSource, Status, Uri}; /// Response type returned by lychee after checking a URI @@ -22,7 +20,7 @@ impl Response { #[inline] #[must_use] /// Create new response - pub const fn new(uri: Result, status: Status, source: ResolvedInputSource) -> Self { + pub const fn new(uri: Uri, status: Status, source: ResolvedInputSource) -> Self { Response(source, ResponseBody { uri, status }) } @@ -70,7 +68,7 @@ impl Serialize for Response { pub struct ResponseBody { #[serde(flatten)] /// The URI which was checked - pub uri: Result, + pub uri: Uri, /// The status of the check pub status: Status, } @@ -82,11 +80,7 @@ pub struct ResponseBody { impl Display for ResponseBody { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { // Always write the URI - - match &self.uri { - Ok(x) => write!(f, "{}", x)?, - Err(x) => write!(f, "{}", x)?, - } + write!(f, "{}", self.uri)?; // Early return for OK status to avoid verbose output if matches!(self.status, Status::Ok(StatusCode::OK)) { diff --git a/lychee-lib/src/types/uri/raw.rs b/lychee-lib/src/types/uri/raw.rs index 19fee81222..74a4ed910c 100644 --- a/lychee-lib/src/types/uri/raw.rs +++ b/lychee-lib/src/types/uri/raw.rs @@ -1,10 +1,8 @@ use std::fmt::Display; -use serde::Serialize; - /// A raw URI that got extracted from a document with a fuzzy parser. /// Note that this can still be invalid according to stricter URI standards -#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct RawUri { /// Unparsed URI represented as a `String`. There is no guarantee that it /// can be parsed into a URI object From c1bd7ca0018f8da29ce22c48823dd91d00be13d2 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 19:44:04 +1000 Subject: [PATCH 10/63] add help --- lychee-bin/src/commands/check.rs | 8 +++++--- lychee-lib/src/types/error.rs | 22 ++++++++++++---------- lychee-lib/src/types/uri/raw.rs | 2 +- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 3cae7427d3..1a7aa9cd46 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -254,9 +254,11 @@ async fn request_channel_task( ) .await } - Err(ErrorKind::CreateRequestItem(uri, src, e)) => { - Response::new(Uri::try_from("https://google.com").unwrap(), Status::Error(*e), src) - } + Err(ErrorKind::CreateRequestItem(uri, src, e)) => Response::new( + Uri::try_from("error://").unwrap(), + Status::Error(ErrorKind::CreateRequestItem(uri, src.clone(), e)), + src, + ), Err(e) => Err(e).expect("cannot read request"), }; diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index ebd2bc62d4..94eafc6d26 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -28,8 +28,8 @@ pub enum ErrorKind { #[error("Error creating request client: {0}")] BuildRequestClient(#[source] reqwest::Error), - /// TODO: a - #[error("Error creating request: {0}")] + /// Cannot create a request item for the given URI in the given source + #[error("Error building URL for {0}")] CreateRequestItem(RawUri, ResolvedInputSource, #[source] Box), /// Network error while using GitHub API @@ -255,9 +255,10 @@ impl ErrorKind { ErrorKind::BuildRequestClient(error) => Some(format!( "Failed to create HTTP client: {error}. Check system configuration", )), - ErrorKind::CreateRequestItem(_, _, _) => Some(format!( - "TODO: details creat request utem" - )), + ErrorKind::CreateRequestItem(_, _, error) => match error.details() { + Some(details) => format!("{}: {}", error.to_string(), details), + None => error.to_string(), + }.into(), ErrorKind::RuntimeJoin(join_error) => Some(format!( "Task execution failed: {join_error}. Internal processing error" )), @@ -286,12 +287,13 @@ impl ErrorKind { ErrorKind::InvalidBase(base, reason) => Some(format!( "Invalid base URL or directory: '{base}'. {reason}", )), - ErrorKind::InvalidBaseJoin(text) => Some(format!( - "Cannot join '{text}' with base URL. Check relative path format", - )), - ErrorKind::InvalidPathToUri(path) => Some(format!( - "Cannot convert path to URI: '{path}'. Check path format", + ErrorKind::InvalidBaseJoin(_) => Some(format!( + "Check relative path format", )), + ErrorKind::InvalidPathToUri(path) => match path { + path if path.starts_with('/') => "To resolve relative links in local files, provide a root dir", + _ => "Check path format", + }.to_string().into(), ErrorKind::RootDirMustBeAbsolute(path_buf) => Some(format!( "Root directory must be absolute: '{}'. Use full path", path_buf.display() diff --git a/lychee-lib/src/types/uri/raw.rs b/lychee-lib/src/types/uri/raw.rs index 74a4ed910c..be8f4ac570 100644 --- a/lychee-lib/src/types/uri/raw.rs +++ b/lychee-lib/src/types/uri/raw.rs @@ -21,7 +21,7 @@ pub struct RawUri { impl Display for RawUri { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{} (Attribute: {:?})", self.text, self.attribute) + write!(f, "{:?} (Attribute: {:?})", self.text, self.attribute) } } From 65afcdd43da748eb8949f583c6099cfe15779060 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 20:19:32 +1000 Subject: [PATCH 11/63] add CollectResult to be less dubious --- lychee-bin/src/commands/check.rs | 20 +++++++++++--------- lychee-bin/src/commands/dump.rs | 6 +++++- lychee-lib/src/lib.rs | 2 +- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 1a7aa9cd46..132e2da057 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -10,8 +10,9 @@ use reqwest::Url; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; +use lychee_lib::ResolvedInputSource; use lychee_lib::archive::Archive; -use lychee_lib::{Client, ErrorKind, Request, Response, Uri}; +use lychee_lib::{Client, ErrorKind, RawUri, Request, Response, Uri}; use lychee_lib::{InputSource, Result}; use lychee_lib::{ResponseBody, Status}; @@ -25,6 +26,8 @@ use crate::{ExitCode, cache::Cache, stats::ResponseStats}; use super::CommandParams; +type CollectResult = std::result::Result; + pub(crate) async fn check( params: CommandParams, ) -> Result<(ResponseStats, Arc, ExitCode)> @@ -172,7 +175,7 @@ async fn suggest_archived_links( // the show_results_task to finish async fn send_inputs_loop( requests: S, - send_req: mpsc::Sender>, + send_req: mpsc::Sender>, bar: Option, ) -> Result<()> where @@ -181,8 +184,8 @@ where tokio::pin!(requests); while let Some(request) = requests.next().await { let request = match request { - x @ Ok(_) => x, - x @ Err(ErrorKind::CreateRequestItem(_, _, _)) => x, + Ok(x) => Ok(x), + Err(ErrorKind::CreateRequestItem(uri, src, err)) => Err((uri, src, *err)), Err(e) => Err(e)?, }; if let Some(pb) = &bar { @@ -231,7 +234,7 @@ fn init_progress_bar(initial_message: &'static str) -> ProgressBar { } async fn request_channel_task( - recv_req: mpsc::Receiver>, + recv_req: mpsc::Receiver>, send_resp: mpsc::Sender, max_concurrency: usize, client: Client, @@ -242,7 +245,7 @@ async fn request_channel_task( StreamExt::for_each_concurrent( ReceiverStream::new(recv_req), max_concurrency, - |request: Result| async { + |request: CollectResult| async { let response = match request { Ok(request) => { handle( @@ -254,12 +257,11 @@ async fn request_channel_task( ) .await } - Err(ErrorKind::CreateRequestItem(uri, src, e)) => Response::new( + Err((uri, src, e)) => Response::new( Uri::try_from("error://").unwrap(), - Status::Error(ErrorKind::CreateRequestItem(uri, src.clone(), e)), + Status::Error(ErrorKind::CreateRequestItem(uri, src.clone(), Box::new(e))), src, ), - Err(e) => Err(e).expect("cannot read request"), }; send_resp diff --git a/lychee-bin/src/commands/dump.rs b/lychee-bin/src/commands/dump.rs index 49847564e3..83c7d14214 100644 --- a/lychee-bin/src/commands/dump.rs +++ b/lychee-bin/src/commands/dump.rs @@ -1,4 +1,5 @@ use log::error; +use log::warn; use lychee_lib::ErrorKind; use lychee_lib::Request; use lychee_lib::Result; @@ -28,7 +29,10 @@ where while let Some(request) = requests.next().await { let mut request = match request { Ok(x) => x, - Err(ErrorKind::CreateRequestItem(_, _, _)) => continue, + Err(e @ ErrorKind::CreateRequestItem(_, _, _)) => { + warn!("{e}"); + continue; + } err @ Err(_) => err?, }; diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index 37dfd62ea9..ef655e19d9 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -96,6 +96,6 @@ pub use crate::{ AcceptRange, AcceptRangeError, Base, BasicAuthCredentials, BasicAuthSelector, CacheStatus, CookieJar, ErrorKind, FileExtensions, FileType, Input, InputContent, InputResolver, InputSource, Redirects, Request, ResolvedInputSource, Response, ResponseBody, Result, - Status, StatusCodeExcluder, StatusCodeSelector, uri::valid::Uri, + Status, StatusCodeExcluder, StatusCodeSelector, uri::raw::RawUri, uri::valid::Uri, }, }; From 891255dae9284f989345d8540fa83bac64517b6d Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 20:39:46 +1000 Subject: [PATCH 12/63] fix lints --- clippy.toml | 1 + lychee-lib/src/collector.rs | 2 +- lychee-lib/src/types/error.rs | 9 +++---- lychee-lib/src/utils/request.rs | 46 +++++++++++++++++++++------------ 4 files changed, 35 insertions(+), 23 deletions(-) create mode 100644 clippy.toml diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 0000000000..c9808129d6 --- /dev/null +++ b/clippy.toml @@ -0,0 +1 @@ +ignore-interior-mutability = ["lychee_lib::ErrorKind"] diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 3baa2c4049..78ab74b1af 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -296,7 +296,7 @@ impl Collector { base.as_ref(), basic_auth_extractor.as_ref(), ); - Result::Ok(stream::iter(requests.into_iter())) + Result::Ok(stream::iter(requests)) } }) .try_flatten() diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 94eafc6d26..e4973b542a 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -16,6 +16,7 @@ use crate::{Uri, basic_auth::BasicAuthExtractorError, utils}; /// Note: The error messages can change over time, so don't match on the output #[derive(Error, Debug)] #[non_exhaustive] +#[allow(clippy::too_many_lines)] pub enum ErrorKind { /// Network error while handling request. /// This does not include erroneous status codes, `RejectedStatusCode` will be used in that case. @@ -256,7 +257,7 @@ impl ErrorKind { "Failed to create HTTP client: {error}. Check system configuration", )), ErrorKind::CreateRequestItem(_, _, error) => match error.details() { - Some(details) => format!("{}: {}", error.to_string(), details), + Some(details) => format!("{error}: {details}"), None => error.to_string(), }.into(), ErrorKind::RuntimeJoin(join_error) => Some(format!( @@ -287,9 +288,7 @@ impl ErrorKind { ErrorKind::InvalidBase(base, reason) => Some(format!( "Invalid base URL or directory: '{base}'. {reason}", )), - ErrorKind::InvalidBaseJoin(_) => Some(format!( - "Check relative path format", - )), + ErrorKind::InvalidBaseJoin(_) => Some("Check relative path format".to_string()), ErrorKind::InvalidPathToUri(path) => match path { path if path.starts_with('/') => "To resolve relative links in local files, provide a root dir", _ => "Check path format", @@ -447,7 +446,7 @@ impl Hash for ErrorKind { Self::NetworkRequest(e) => e.to_string().hash(state), Self::ReadResponseBody(e) => e.to_string().hash(state), Self::BuildRequestClient(e) => e.to_string().hash(state), - Self::CreateRequestItem(uri, s, e) => (uri, s, e.to_string()).hash(state), + Self::CreateRequestItem(uri, s, _e) => (uri, s).hash(state), /* omit error to avoid mutable fields */ Self::BuildGithubClient(e) => e.to_string().hash(state), Self::GithubRequest(e) => e.to_string().hash(state), Self::InvalidGithubUrl(s) => s.hash(state), diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 0ca2882c27..772765da8b 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -1,4 +1,3 @@ -use log::warn; use percent_encoding::percent_decode_str; use reqwest::Url; use std::{ @@ -190,6 +189,19 @@ mod tests { use super::*; + fn create_ok_only( + uris: Vec, + source: &ResolvedInputSource, + root_dir: Option<&PathBuf>, + base: Option<&Base>, + extractor: Option<&BasicAuthExtractor>, + ) -> HashSet { + create(uris, source, root_dir, base, extractor) + .into_iter() + .filter_map(std::result::Result::ok) + .collect() + } + #[test] fn test_is_anchor() { assert!(is_anchor("#anchor")); @@ -209,7 +221,7 @@ mod tests { let source = ResolvedInputSource::String(Cow::Borrowed("")); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &source, None, Some(&base), None); + let requests = create_ok_only(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!( @@ -225,7 +237,7 @@ mod tests { let source = ResolvedInputSource::String(Cow::Borrowed("")); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &source, None, Some(&base), None); + let requests = create_ok_only(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!( @@ -241,7 +253,7 @@ mod tests { let source = ResolvedInputSource::String(Cow::Borrowed("")); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &source, None, Some(&base), None); + let requests = create_ok_only(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!( @@ -257,7 +269,7 @@ mod tests { let source = ResolvedInputSource::String(Cow::Borrowed("")); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &source, None, Some(&base), None); + let requests = create_ok_only(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!( @@ -273,7 +285,7 @@ mod tests { let source = ResolvedInputSource::String(Cow::Borrowed("")); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &source, None, Some(&base), None); + let requests = create_ok_only(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!( @@ -289,7 +301,7 @@ mod tests { let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &source, Some(&root_dir), None, None); + let requests = create_ok_only(uris, &source, Some(&root_dir), None, None); assert_eq!(requests.len(), 1); assert!( @@ -305,7 +317,7 @@ mod tests { let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &source, Some(&root_dir), None, None); + let requests = create_ok_only(uris, &source, Some(&root_dir), None, None); assert_eq!(requests.len(), 1); assert!( @@ -321,7 +333,7 @@ mod tests { let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &source, Some(&root_dir), None, None); + let requests = create_ok_only(uris, &source, Some(&root_dir), None, None); assert_eq!(requests.len(), 1); assert!( @@ -337,7 +349,7 @@ mod tests { let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &source, Some(&root_dir), None, None); + let requests = create_ok_only(uris, &source, Some(&root_dir), None, None); assert_eq!(requests.len(), 1); assert!( @@ -353,7 +365,7 @@ mod tests { let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &source, Some(&root_dir), None, None); + let requests = create_ok_only(uris, &source, Some(&root_dir), None, None); assert_eq!(requests.len(), 1); assert!( @@ -370,7 +382,7 @@ mod tests { let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &source, Some(&root_dir), Some(&base), None); + let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None); assert_eq!(requests.len(), 1); assert!( @@ -387,7 +399,7 @@ mod tests { let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &source, Some(&root_dir), Some(&base), None); + let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None); assert_eq!(requests.len(), 1); assert!( @@ -404,7 +416,7 @@ mod tests { let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &source, Some(&root_dir), Some(&base), None); + let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None); assert_eq!(requests.len(), 1); assert!( @@ -421,7 +433,7 @@ mod tests { let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &source, Some(&root_dir), Some(&base), None); + let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None); assert_eq!(requests.len(), 1); assert!( @@ -438,7 +450,7 @@ mod tests { let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html")); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &source, Some(&root_dir), Some(&base), None); + let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None); assert_eq!(requests.len(), 1); assert!( @@ -453,7 +465,7 @@ mod tests { let source = ResolvedInputSource::String(Cow::Borrowed("")); let uris = vec![RawUri::from("https://example.com/page")]; - let requests = create(uris, &source, None, None, None); + let requests = create_ok_only(uris, &source, None, None, None); assert_eq!(requests.len(), 1); assert!( From ff8774536025f80376d229cb2fe99523a96ef227 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 4 Oct 2025 20:56:05 +1000 Subject: [PATCH 13/63] touch --- lychee-lib/src/types/error.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index e4973b542a..5561e0ef8b 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -16,7 +16,6 @@ use crate::{Uri, basic_auth::BasicAuthExtractorError, utils}; /// Note: The error messages can change over time, so don't match on the output #[derive(Error, Debug)] #[non_exhaustive] -#[allow(clippy::too_many_lines)] pub enum ErrorKind { /// Network error while handling request. /// This does not include erroneous status codes, `RejectedStatusCode` will be used in that case. @@ -290,7 +289,8 @@ impl ErrorKind { )), ErrorKind::InvalidBaseJoin(_) => Some("Check relative path format".to_string()), ErrorKind::InvalidPathToUri(path) => match path { - path if path.starts_with('/') => "To resolve relative links in local files, provide a root dir", + path if path.starts_with('/') => + "To resolve root-relative links in local files, provide a root dir", _ => "Check path format", }.to_string().into(), ErrorKind::RootDirMustBeAbsolute(path_buf) => Some(format!( From 024772a347e831f526192333bb8c845eccca6001 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 5 Oct 2025 15:24:40 +1000 Subject: [PATCH 14/63] introduce separate RequestError type. but... we still ned to feed it back to Status and that requires an old ErrorKind. maybe we add a new case to Status. --- lychee-bin/src/commands/check.rs | 21 ++++++++------------- lychee-bin/src/commands/dump.rs | 7 ++++--- lychee-bin/src/commands/mod.rs | 5 ++++- lychee-lib/src/collector.rs | 15 ++++++++++----- lychee-lib/src/lib.rs | 4 ++-- lychee-lib/src/types/mod.rs | 2 +- lychee-lib/src/types/request.rs | 15 ++++++++++++++- lychee-lib/src/utils/request.rs | 9 ++++----- 8 files changed, 47 insertions(+), 31 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 132e2da057..c0b270ba76 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -10,6 +10,7 @@ use reqwest::Url; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; +use lychee_lib::RequestError; use lychee_lib::ResolvedInputSource; use lychee_lib::archive::Archive; use lychee_lib::{Client, ErrorKind, RawUri, Request, Response, Uri}; @@ -26,13 +27,11 @@ use crate::{ExitCode, cache::Cache, stats::ResponseStats}; use super::CommandParams; -type CollectResult = std::result::Result; - pub(crate) async fn check( params: CommandParams, ) -> Result<(ResponseStats, Arc, ExitCode)> where - S: futures::Stream>, + S: futures::Stream>, { // Setup let (send_req, recv_req) = mpsc::channel(params.cfg.max_concurrency); @@ -175,19 +174,14 @@ async fn suggest_archived_links( // the show_results_task to finish async fn send_inputs_loop( requests: S, - send_req: mpsc::Sender>, + send_req: mpsc::Sender>, bar: Option, ) -> Result<()> where - S: futures::Stream>, + S: futures::Stream>, { tokio::pin!(requests); while let Some(request) = requests.next().await { - let request = match request { - Ok(x) => Ok(x), - Err(ErrorKind::CreateRequestItem(uri, src, err)) => Err((uri, src, *err)), - Err(e) => Err(e)?, - }; if let Some(pb) = &bar { pb.inc_length(1); if let Ok(request) = &request { @@ -234,7 +228,7 @@ fn init_progress_bar(initial_message: &'static str) -> ProgressBar { } async fn request_channel_task( - recv_req: mpsc::Receiver>, + recv_req: mpsc::Receiver>, send_resp: mpsc::Sender, max_concurrency: usize, client: Client, @@ -245,7 +239,7 @@ async fn request_channel_task( StreamExt::for_each_concurrent( ReceiverStream::new(recv_req), max_concurrency, - |request: CollectResult| async { + |request: std::result::Result| async { let response = match request { Ok(request) => { handle( @@ -257,11 +251,12 @@ async fn request_channel_task( ) .await } - Err((uri, src, e)) => Response::new( + Err(RequestError::CreateRequestItem(uri, src, e)) => Response::new( Uri::try_from("error://").unwrap(), Status::Error(ErrorKind::CreateRequestItem(uri, src.clone(), Box::new(e))), src, ), + Err(e @ RequestError::GetInputContent(_)) => panic!("{e}"), }; send_resp diff --git a/lychee-bin/src/commands/dump.rs b/lychee-bin/src/commands/dump.rs index 83c7d14214..defba34289 100644 --- a/lychee-bin/src/commands/dump.rs +++ b/lychee-bin/src/commands/dump.rs @@ -2,6 +2,7 @@ use log::error; use log::warn; use lychee_lib::ErrorKind; use lychee_lib::Request; +use lychee_lib::RequestError; use lychee_lib::Result; use std::fs; use std::io::{self, Write}; @@ -15,7 +16,7 @@ use super::CommandParams; /// Dump all detected links to stdout without checking them pub(crate) async fn dump(params: CommandParams) -> Result where - S: futures::Stream>, + S: futures::Stream>, { let requests = params.requests; tokio::pin!(requests); @@ -29,11 +30,11 @@ where while let Some(request) = requests.next().await { let mut request = match request { Ok(x) => x, - Err(e @ ErrorKind::CreateRequestItem(_, _, _)) => { + Err(e @ RequestError::CreateRequestItem(_, _, _)) => { warn!("{e}"); continue; } - err @ Err(_) => err?, + Err(RequestError::GetInputContent(e)) => Err(e)?, }; // Apply URI remappings (if any) diff --git a/lychee-bin/src/commands/mod.rs b/lychee-bin/src/commands/mod.rs index 1f00503f02..47df1bfc79 100644 --- a/lychee-bin/src/commands/mod.rs +++ b/lychee-bin/src/commands/mod.rs @@ -13,11 +13,14 @@ use std::sync::Arc; use crate::cache::Cache; use crate::options::Config; +use lychee_lib::RequestError; use lychee_lib::Result; use lychee_lib::{Client, Request}; /// Parameters passed to every command -pub(crate) struct CommandParams>> { +pub(crate) struct CommandParams< + S: futures::Stream>, +> { pub(crate) client: Client, pub(crate) cache: Arc, pub(crate) requests: S, diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 78ab74b1af..1f47872805 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -3,7 +3,7 @@ use crate::InputSource; use crate::filter::PathExcludes; use crate::types::resolver::UrlContentResolver; use crate::{ - Base, Input, InputResolver, Request, Result, basic_auth::BasicAuthExtractor, + Base, Input, InputResolver, Request, RequestError, Result, basic_auth::BasicAuthExtractor, extract::Extractor, types::FileExtensions, types::uri::raw::RawUri, utils::request, }; use dashmap::DashSet; @@ -223,7 +223,10 @@ impl Collector { /// Convenience method to fetch all unique links from inputs /// with the default extensions. - pub fn collect_links(self, inputs: HashSet) -> impl Stream> { + pub fn collect_links( + self, + inputs: HashSet, + ) -> impl Stream> { self.collect_links_from_file_types(inputs, crate::types::FileType::default_extensions()) } @@ -238,7 +241,7 @@ impl Collector { self, inputs: HashSet, extensions: FileExtensions, - ) -> impl Stream> { + ) -> impl Stream> { let skip_missing_inputs = self.skip_missing_inputs; let skip_hidden = self.skip_hidden; let skip_ignored = self.skip_ignored; @@ -279,7 +282,9 @@ impl Collector { resolver, excluded_paths, ) - .map(move |content| (content, base.clone())) + .map(move |content| { + (content.map_err(RequestError::GetInputContent), base.clone()) + }) } }) .flatten() @@ -296,7 +301,7 @@ impl Collector { base.as_ref(), basic_auth_extractor.as_ref(), ); - Result::Ok(stream::iter(requests)) + std::result::Result::Ok(stream::iter(requests)) } }) .try_flatten() diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index ef655e19d9..0a84fcab64 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -95,7 +95,7 @@ pub use crate::{ types::{ AcceptRange, AcceptRangeError, Base, BasicAuthCredentials, BasicAuthSelector, CacheStatus, CookieJar, ErrorKind, FileExtensions, FileType, Input, InputContent, InputResolver, - InputSource, Redirects, Request, ResolvedInputSource, Response, ResponseBody, Result, - Status, StatusCodeExcluder, StatusCodeSelector, uri::raw::RawUri, uri::valid::Uri, + InputSource, Redirects, Request, RequestError, ResolvedInputSource, Response, ResponseBody, + Result, Status, StatusCodeExcluder, StatusCodeSelector, uri::raw::RawUri, uri::valid::Uri, }, }; diff --git a/lychee-lib/src/types/mod.rs b/lychee-lib/src/types/mod.rs index 679b26f972..87ba9957d3 100644 --- a/lychee-lib/src/types/mod.rs +++ b/lychee-lib/src/types/mod.rs @@ -26,7 +26,7 @@ pub use error::ErrorKind; pub use file::{FileExtensions, FileType}; pub use input::{Input, InputContent, InputResolver, InputSource, ResolvedInputSource}; pub use redirect_history::Redirects; -pub use request::Request; +pub use request::{Request, RequestError}; pub use response::{Response, ResponseBody}; pub use status::Status; pub use status_code::*; diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index da6844398a..6ff3cbcd7f 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -1,9 +1,22 @@ use std::{borrow::Cow, convert::TryFrom, fmt::Display}; +use thiserror::Error; -use crate::{BasicAuthCredentials, ErrorKind, Uri}; +use crate::{BasicAuthCredentials, ErrorKind, RawUri, Uri}; +use super::InputSource; use super::ResolvedInputSource; +/// a +#[derive(Error, Debug, PartialEq, Eq, Hash)] +pub enum RequestError { + /// a + #[error("Network error")] + CreateRequestItem(RawUri, ResolvedInputSource, #[source] ErrorKind), + /// a + #[error("Network error")] + GetInputContent(#[source] ErrorKind), +} + /// A request type that can be handle by lychee #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct Request { diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 772765da8b..c293f03470 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -6,7 +6,7 @@ use std::{ }; use crate::{ - Base, BasicAuthCredentials, ErrorKind, Request, Result, Uri, + Base, BasicAuthCredentials, ErrorKind, Request, RequestError, Result, Uri, basic_auth::BasicAuthExtractor, types::{ResolvedInputSource, uri::raw::RawUri}, utils::{path, url}, @@ -122,14 +122,13 @@ pub(crate) fn create( root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, -) -> HashSet> { +) -> HashSet> { let base = base.cloned().or_else(|| Base::from_source(source)); uris.into_iter() .map(|raw_uri| { - create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| { - ErrorKind::CreateRequestItem(raw_uri.clone(), source.clone(), Box::new(e)) - }) + create_request(&raw_uri, source, root_dir, base.as_ref(), extractor) + .map_err(|e| RequestError::CreateRequestItem(raw_uri.clone(), source.clone(), e)) }) .collect() } From 3ee280eb1495523c192022a1dc2797812e15c887 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 5 Oct 2025 15:37:05 +1000 Subject: [PATCH 15/63] add Status::RequestError --- lychee-bin/src/commands/check.rs | 2 +- lychee-bin/src/formatters/response/color.rs | 4 +++- lychee-bin/src/formatters/response/emoji.rs | 4 +++- lychee-bin/src/stats.rs | 2 +- lychee-lib/src/retry.rs | 1 + lychee-lib/src/types/cache.rs | 1 + lychee-lib/src/types/request.rs | 10 ++++++++++ lychee-lib/src/types/status.rs | 9 ++++++++- 8 files changed, 28 insertions(+), 5 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index c0b270ba76..c0dd6b6d36 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -253,7 +253,7 @@ async fn request_channel_task( } Err(RequestError::CreateRequestItem(uri, src, e)) => Response::new( Uri::try_from("error://").unwrap(), - Status::Error(ErrorKind::CreateRequestItem(uri, src.clone(), Box::new(e))), + Status::RequestError(RequestError::CreateRequestItem(uri, src.clone(), e)), src, ), Err(e @ RequestError::GetInputContent(_)) => panic!("{e}"), diff --git a/lychee-bin/src/formatters/response/color.rs b/lychee-bin/src/formatters/response/color.rs index fd99c24ba9..2ea7388916 100644 --- a/lychee-bin/src/formatters/response/color.rs +++ b/lychee-bin/src/formatters/response/color.rs @@ -20,7 +20,9 @@ impl ColorFormatter { | Status::Unsupported(_) | Status::Cached(CacheStatus::Excluded | CacheStatus::Unsupported) => &DIM, Status::UnknownStatusCode(_) | Status::Timeout(_) => &YELLOW, - Status::Error(_) | Status::Cached(CacheStatus::Error(_)) => &PINK, + Status::Error(_) | Status::RequestError(_) | Status::Cached(CacheStatus::Error(_)) => { + &PINK + } } } diff --git a/lychee-bin/src/formatters/response/emoji.rs b/lychee-bin/src/formatters/response/emoji.rs index 06179920fd..7d31f35c75 100644 --- a/lychee-bin/src/formatters/response/emoji.rs +++ b/lychee-bin/src/formatters/response/emoji.rs @@ -19,7 +19,9 @@ impl EmojiFormatter { | Status::Cached(CacheStatus::Excluded | CacheStatus::Unsupported) => "🚫", Status::Redirected(_, _) => "↪️", Status::UnknownStatusCode(_) | Status::Timeout(_) => "⚠️", - Status::Error(_) | Status::Cached(CacheStatus::Error(_)) => "❌", + Status::Error(_) | Status::RequestError(_) | Status::Cached(CacheStatus::Error(_)) => { + "❌" + } } } } diff --git a/lychee-bin/src/stats.rs b/lychee-bin/src/stats.rs index 7d97013ead..b39c56d0d1 100644 --- a/lychee-bin/src/stats.rs +++ b/lychee-bin/src/stats.rs @@ -71,7 +71,7 @@ impl ResponseStats { pub(crate) const fn increment_status_counters(&mut self, status: &Status) { match status { Status::Ok(_) => self.successful += 1, - Status::Error(_) => self.errors += 1, + Status::Error(_) | Status::RequestError(_) => self.errors += 1, Status::UnknownStatusCode(_) => self.unknown += 1, Status::Timeout(_) => self.timeouts += 1, Status::Redirected(_, _) => self.redirects += 1, diff --git a/lychee-lib/src/retry.rs b/lychee-lib/src/retry.rs index a4217a0de4..8bf0ec7d10 100644 --- a/lychee-lib/src/retry.rs +++ b/lychee-lib/src/retry.rs @@ -120,6 +120,7 @@ impl RetryExt for Status { match self { Status::Ok(_) => false, Status::Error(err) => err.should_retry(), + Status::RequestError(_) => false, Status::Timeout(_) => true, Status::Redirected(_, _) => false, Status::UnknownStatusCode(_) => false, diff --git a/lychee-lib/src/types/cache.rs b/lychee-lib/src/types/cache.rs index 28deda165e..017d0770e1 100644 --- a/lychee-lib/src/types/cache.rs +++ b/lychee-lib/src/types/cache.rs @@ -85,6 +85,7 @@ impl From<&Status> for CacheStatus { } _ => Self::Error(None), }, + Status::RequestError(_) => Self::Error(None), } } } diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index 6ff3cbcd7f..d7cf432e8c 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -17,6 +17,16 @@ pub enum RequestError { GetInputContent(#[source] ErrorKind), } +impl RequestError { + /// a + pub fn source(&self) -> &ErrorKind { + match self { + Self::CreateRequestItem(_, _, e) => e, + Self::GetInputContent(e) => e, + } + } +} + /// A request type that can be handle by lychee #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct Request { diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index 28e066993e..a088b74e16 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -1,8 +1,9 @@ -use std::{collections::HashSet, fmt::Display}; +use std::{collections::HashSet, error::Error, fmt::Display}; use super::CacheStatus; use super::redirect_history::Redirects; use crate::ErrorKind; +use crate::RequestError; use http::StatusCode; use reqwest::Response; use serde::ser::SerializeStruct; @@ -25,6 +26,8 @@ pub enum Status { Ok(StatusCode), /// Failed request Error(ErrorKind), + /// Request could not be built + RequestError(RequestError), /// Request timed out Timeout(Option), /// Got redirected to different resource @@ -51,6 +54,7 @@ impl Display for Status { Status::Timeout(None) => f.write_str("Timeout"), Status::Unsupported(e) => write!(f, "Unsupported: {e}"), Status::Error(e) => write!(f, "{e}"), + Status::RequestError(e) => write!(f, "{e}"), Status::Cached(status) => write!(f, "{status}"), Status::Excluded => Ok(()), } @@ -153,6 +157,7 @@ impl Status { )) } Status::Error(e) => e.details(), + Status::RequestError(e) => e.source().details(), Status::Timeout(_) => None, Status::UnknownStatusCode(_) => None, Status::Unsupported(_) => None, @@ -214,6 +219,7 @@ impl Status { Status::UnknownStatusCode(_) => ICON_UNKNOWN, Status::Excluded => ICON_EXCLUDED, Status::Error(_) => ICON_ERROR, + Status::RequestError(_) => ICON_ERROR, Status::Timeout(_) => ICON_TIMEOUT, Status::Unsupported(_) => ICON_UNSUPPORTED, Status::Cached(_) => ICON_CACHED, @@ -260,6 +266,7 @@ impl Status { } _ => "ERROR".to_string(), }, + Status::RequestError(_) => "ERROR".to_string(), Status::Timeout(code) => match code { Some(code) => code.as_str().to_string(), None => "TIMEOUT".to_string(), From 1ad2020d736959780102e607f1a30a835c93a682 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 5 Oct 2025 15:43:13 +1000 Subject: [PATCH 16/63] mark as error --- lychee-lib/src/types/request.rs | 4 ++-- lychee-lib/src/types/status.rs | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index d7cf432e8c..87a5335af0 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -10,10 +10,10 @@ use super::ResolvedInputSource; #[derive(Error, Debug, PartialEq, Eq, Hash)] pub enum RequestError { /// a - #[error("Network error")] + #[error("Error building URL for {0}: {2}")] CreateRequestItem(RawUri, ResolvedInputSource, #[source] ErrorKind), /// a - #[error("Network error")] + #[error("Error getting input content: {0}")] GetInputContent(#[source] ErrorKind), } diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index a088b74e16..35ba4dc3f7 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -179,7 +179,10 @@ impl Status { pub const fn is_error(&self) -> bool { matches!( self, - Status::Error(_) | Status::Cached(CacheStatus::Error(_)) | Status::Timeout(_) + Status::Error(_) + | Status::RequestError(_) + | Status::Cached(CacheStatus::Error(_)) + | Status::Timeout(_) ) } From 25d32cef0da2930a5bfff5fa3aaf252b059af502 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 5 Oct 2025 15:44:46 +1000 Subject: [PATCH 17/63] remove ErrorKind::CreateRequestItem case --- lychee-lib/src/types/error.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 5561e0ef8b..76f99649eb 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -28,10 +28,6 @@ pub enum ErrorKind { #[error("Error creating request client: {0}")] BuildRequestClient(#[source] reqwest::Error), - /// Cannot create a request item for the given URI in the given source - #[error("Error building URL for {0}")] - CreateRequestItem(RawUri, ResolvedInputSource, #[source] Box), - /// Network error while using GitHub API #[error("Network error (GitHub client)")] GithubRequest(#[from] Box), @@ -255,10 +251,6 @@ impl ErrorKind { ErrorKind::BuildRequestClient(error) => Some(format!( "Failed to create HTTP client: {error}. Check system configuration", )), - ErrorKind::CreateRequestItem(_, _, error) => match error.details() { - Some(details) => format!("{error}: {details}"), - None => error.to_string(), - }.into(), ErrorKind::RuntimeJoin(join_error) => Some(format!( "Task execution failed: {join_error}. Internal processing error" )), @@ -384,9 +376,6 @@ impl PartialEq for ErrorKind { (Self::BuildRequestClient(e1), Self::BuildRequestClient(e2)) => { e1.to_string() == e2.to_string() } - (Self::CreateRequestItem(uri1, s1, e1), Self::CreateRequestItem(uri2, s2, e2)) => { - uri1 == uri2 && s1 == s2 && e1.to_string() == e2.to_string() - } (Self::RuntimeJoin(e1), Self::RuntimeJoin(e2)) => e1.to_string() == e2.to_string(), (Self::ReadFileInput(e1, s1), Self::ReadFileInput(e2, s2)) => { e1.kind() == e2.kind() && s1 == s2 @@ -446,7 +435,6 @@ impl Hash for ErrorKind { Self::NetworkRequest(e) => e.to_string().hash(state), Self::ReadResponseBody(e) => e.to_string().hash(state), Self::BuildRequestClient(e) => e.to_string().hash(state), - Self::CreateRequestItem(uri, s, _e) => (uri, s).hash(state), /* omit error to avoid mutable fields */ Self::BuildGithubClient(e) => e.to_string().hash(state), Self::GithubRequest(e) => e.to_string().hash(state), Self::InvalidGithubUrl(s) => s.hash(state), From c7536d1eef45a55f2134b60c073770da543f473f Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 5 Oct 2025 15:57:49 +1000 Subject: [PATCH 18/63] blah --- lychee-bin/src/commands/check.rs | 15 +++++++++------ lychee-lib/src/types/request.rs | 10 +++++++++- lychee-lib/src/types/status.rs | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index c0dd6b6d36..b945159a12 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -251,12 +251,15 @@ async fn request_channel_task( ) .await } - Err(RequestError::CreateRequestItem(uri, src, e)) => Response::new( - Uri::try_from("error://").unwrap(), - Status::RequestError(RequestError::CreateRequestItem(uri, src.clone(), e)), - src, - ), - Err(e @ RequestError::GetInputContent(_)) => panic!("{e}"), + // Err(RequestError::CreateRequestItem(uri, src, e)) => Response::new( + // Uri::try_from("error://").unwrap(), + // Status::RequestError(RequestError::CreateRequestItem(uri, src.clone(), e)), + // src, + // ), + err @ Err(_) => { + err.expect("ads"); + panic!() + } }; send_resp diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index 87a5335af0..21b700d48e 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -19,7 +19,15 @@ pub enum RequestError { impl RequestError { /// a - pub fn source(&self) -> &ErrorKind { + pub fn error_kind(&self) -> &ErrorKind { + match self { + Self::CreateRequestItem(_, _, e) => e, + Self::GetInputContent(e) => e, + } + } + + /// b + pub fn into_source(self) -> ErrorKind { match self { Self::CreateRequestItem(_, _, e) => e, Self::GetInputContent(e) => e, diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index 35ba4dc3f7..0680f2007e 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -157,7 +157,7 @@ impl Status { )) } Status::Error(e) => e.details(), - Status::RequestError(e) => e.source().details(), + Status::RequestError(e) => e.error_kind().details(), Status::Timeout(_) => None, Status::UnknownStatusCode(_) => None, Status::Unsupported(_) => None, From d10190afcc73c00658e2ff1fcde0f49fff87d3e7 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 5 Oct 2025 16:11:49 +1000 Subject: [PATCH 19/63] uncomment --- lychee-bin/src/commands/check.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index b945159a12..eac3f560bf 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -251,11 +251,11 @@ async fn request_channel_task( ) .await } - // Err(RequestError::CreateRequestItem(uri, src, e)) => Response::new( - // Uri::try_from("error://").unwrap(), - // Status::RequestError(RequestError::CreateRequestItem(uri, src.clone(), e)), - // src, - // ), + Err(RequestError::CreateRequestItem(uri, src, e)) => Response::new( + Uri::try_from("error://").unwrap(), + Status::RequestError(RequestError::CreateRequestItem(uri, src.clone(), e)), + src, + ), err @ Err(_) => { err.expect("ads"); panic!() From 4f74a77c2f7f635d57c722b7242bc6e8e76dda7f Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 5 Oct 2025 16:23:19 +1000 Subject: [PATCH 20/63] restore old panic behaviour for input-source errors --- lychee-bin/src/commands/check.rs | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index eac3f560bf..9178b19961 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -195,13 +195,14 @@ where /// Reads from the request channel and updates the progress bar status async fn progress_bar_task( - mut recv_resp: mpsc::Receiver, + mut recv_resp: mpsc::Receiver>, verbose: Verbosity, pb: Option, formatter: Box, mut stats: ResponseStats, ) -> Result<(Option, ResponseStats)> { while let Some(response) = recv_resp.recv().await { + let response = response?; show_progress( &mut io::stderr(), pb.as_ref(), @@ -229,7 +230,7 @@ fn init_progress_bar(initial_message: &'static str) -> ProgressBar { async fn request_channel_task( recv_req: mpsc::Receiver>, - send_resp: mpsc::Sender, + send_resp: mpsc::Sender>, max_concurrency: usize, client: Client, cache: Arc, @@ -241,25 +242,20 @@ async fn request_channel_task( max_concurrency, |request: std::result::Result| async { let response = match request { - Ok(request) => { - handle( - &client, - cache.clone(), - cache_exclude_status.clone(), - request, - accept.clone(), - ) - .await - } - Err(RequestError::CreateRequestItem(uri, src, e)) => Response::new( + Ok(request) => Ok(handle( + &client, + cache.clone(), + cache_exclude_status.clone(), + request, + accept.clone(), + ) + .await), + Err(RequestError::CreateRequestItem(uri, src, e)) => Ok(Response::new( Uri::try_from("error://").unwrap(), Status::RequestError(RequestError::CreateRequestItem(uri, src.clone(), e)), src, - ), - err @ Err(_) => { - err.expect("ads"); - panic!() - } + )), + Err(e) => Err(e.into_source()), }; send_resp From 3bca6c8daa864838e0c9de9914fcc6b0cbe77b19 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 5 Oct 2025 16:29:52 +1000 Subject: [PATCH 21/63] docs --- lychee-lib/src/types/request.rs | 15 +++++++++------ lychee-lib/src/types/status.rs | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index 21b700d48e..9b7d423664 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -6,27 +6,30 @@ use crate::{BasicAuthCredentials, ErrorKind, RawUri, Uri}; use super::InputSource; use super::ResolvedInputSource; -/// a +/// An error which occurs while trying to construct a [`Request`] object. +/// That is, an error which happens while trying to load links from an input +/// source. #[derive(Error, Debug, PartialEq, Eq, Hash)] pub enum RequestError { - /// a + /// Unable to construct a URL for a link appearing within the given source. #[error("Error building URL for {0}: {2}")] CreateRequestItem(RawUri, ResolvedInputSource, #[source] ErrorKind), - /// a + + /// Unable to load the content of an input source. #[error("Error getting input content: {0}")] GetInputContent(#[source] ErrorKind), } impl RequestError { - /// a - pub fn error_kind(&self) -> &ErrorKind { + /// Get the underlying cause of this [`RequestError`]. + pub fn error(&self) -> &ErrorKind { match self { Self::CreateRequestItem(_, _, e) => e, Self::GetInputContent(e) => e, } } - /// b + /// Convert this [`RequestError`] into its source error. pub fn into_source(self) -> ErrorKind { match self { Self::CreateRequestItem(_, _, e) => e, diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index 0680f2007e..be83eb554b 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -157,7 +157,7 @@ impl Status { )) } Status::Error(e) => e.details(), - Status::RequestError(e) => e.error_kind().details(), + Status::RequestError(e) => e.error().details(), Status::Timeout(_) => None, Status::UnknownStatusCode(_) => None, Status::Unsupported(_) => None, From 49c550e33ab8201cb497ba09b81491e51cf43458 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 5 Oct 2025 16:40:52 +1000 Subject: [PATCH 22/63] remove unused imports --- lychee-bin/src/commands/check.rs | 49 +++++++++++++++++--------------- lychee-bin/src/commands/dump.rs | 1 - lychee-lib/src/types/error.rs | 2 -- lychee-lib/src/types/request.rs | 1 - lychee-lib/src/types/status.rs | 2 +- 5 files changed, 27 insertions(+), 28 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 9178b19961..7b37a28bbc 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -11,9 +11,8 @@ use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; use lychee_lib::RequestError; -use lychee_lib::ResolvedInputSource; use lychee_lib::archive::Archive; -use lychee_lib::{Client, ErrorKind, RawUri, Request, Response, Uri}; +use lychee_lib::{Client, ErrorKind, Request, Response, Uri}; use lychee_lib::{InputSource, Result}; use lychee_lib::{ResponseBody, Status}; @@ -241,22 +240,14 @@ async fn request_channel_task( ReceiverStream::new(recv_req), max_concurrency, |request: std::result::Result| async { - let response = match request { - Ok(request) => Ok(handle( - &client, - cache.clone(), - cache_exclude_status.clone(), - request, - accept.clone(), - ) - .await), - Err(RequestError::CreateRequestItem(uri, src, e)) => Ok(Response::new( - Uri::try_from("error://").unwrap(), - Status::RequestError(RequestError::CreateRequestItem(uri, src.clone(), e)), - src, - )), - Err(e) => Err(e.into_source()), - }; + let response = handle( + &client, + cache.clone(), + cache_exclude_status.clone(), + request, + accept.clone(), + ) + .await; send_resp .send(response) @@ -291,9 +282,21 @@ async fn handle( client: &Client, cache: Arc, cache_exclude_status: HashSet, - request: Request, + request: std::result::Result, accept: HashSet, -) -> Response { +) -> Result { + let request = match request { + Ok(x) => x, + Err(RequestError::CreateRequestItem(uri, src, e)) => { + return Ok(Response::new( + Uri::try_from("error://").unwrap(), + Status::RequestError(RequestError::CreateRequestItem(uri, src.clone(), e)), + src, + )); + } + Err(e) => return Err(e.into_source()), + }; + let uri = request.uri.clone(); if let Some(v) = cache.get(&uri) { // Found a cached request @@ -308,7 +311,7 @@ async fn handle( // code. Status::from_cache_status(v.value().status, &accept) }; - return Response::new(uri.clone(), status, request.source); + return Ok(Response::new(uri.clone(), status, request.source)); } // Request was not cached; run a normal check @@ -322,11 +325,11 @@ async fn handle( // - Skip caching links for which the status code has been explicitly excluded from the cache. let status = response.status(); if ignore_cache(&uri, status, &cache_exclude_status) { - return response; + return Ok(response); } cache.insert(uri, status.into()); - response + Ok(response) } /// Returns `true` if the response should be ignored in the cache. diff --git a/lychee-bin/src/commands/dump.rs b/lychee-bin/src/commands/dump.rs index defba34289..112ef98e2e 100644 --- a/lychee-bin/src/commands/dump.rs +++ b/lychee-bin/src/commands/dump.rs @@ -1,6 +1,5 @@ use log::error; use log::warn; -use lychee_lib::ErrorKind; use lychee_lib::Request; use lychee_lib::RequestError; use lychee_lib::Result; diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 76f99649eb..2ec5b5183a 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -7,9 +7,7 @@ use thiserror::Error; use tokio::task::JoinError; use super::InputContent; -use crate::ResolvedInputSource; use crate::types::StatusCodeSelectorError; -use crate::types::uri::raw::RawUri; use crate::{Uri, basic_auth::BasicAuthExtractorError, utils}; /// Kinds of status errors diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index 9b7d423664..71a375d8b0 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -3,7 +3,6 @@ use thiserror::Error; use crate::{BasicAuthCredentials, ErrorKind, RawUri, Uri}; -use super::InputSource; use super::ResolvedInputSource; /// An error which occurs while trying to construct a [`Request`] object. diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index be83eb554b..0bceb59a40 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -1,4 +1,4 @@ -use std::{collections::HashSet, error::Error, fmt::Display}; +use std::{collections::HashSet, fmt::Display}; use super::CacheStatus; use super::redirect_history::Redirects; From 6a007c287ec3cccbd73661d06fab8c10713c8dac Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 7 Oct 2025 15:57:49 +1000 Subject: [PATCH 23/63] fix example --- examples/collect_links/collect_links.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs index c73dc2aef3..7671be4fcf 100644 --- a/examples/collect_links/collect_links.rs +++ b/examples/collect_links/collect_links.rs @@ -1,10 +1,10 @@ -use lychee_lib::{Collector, Input, InputSource, Result}; +use lychee_lib::{Collector, Input, InputSource, RequestError}; use reqwest::Url; -use std::{collections::HashSet, path::PathBuf}; +use std::{collections::HashSet, path::PathBuf, result::Result}; use tokio_stream::StreamExt; #[tokio::main] -async fn main() -> Result<()> { +async fn main() -> Result<(), RequestError> { // Collect all links from the following inputs let inputs = HashSet::from_iter([ Input::from_input_source(InputSource::RemoteUrl(Box::new( @@ -19,7 +19,7 @@ async fn main() -> Result<()> { .skip_ignored(false) // skip files that are ignored by git? (default=true) .use_html5ever(false) // use html5ever for parsing? (default=false) .collect_links(inputs) // base url or directory - .collect::>>() + .collect::, _>>() .await?; dbg!(links); From d7ebbc0321ac133cd1e4f44f50e4fbab38763e7c Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 7 Oct 2025 16:05:50 +1000 Subject: [PATCH 24/63] clippy. includes boxing errorkind because it's big --- lychee-bin/src/commands/check.rs | 2 +- lychee-bin/src/commands/dump.rs | 2 +- lychee-lib/src/collector.rs | 7 ++++++- lychee-lib/src/types/request.rs | 16 ++++++++-------- lychee-lib/src/types/status.rs | 3 +-- lychee-lib/src/utils/request.rs | 5 +++-- 6 files changed, 20 insertions(+), 15 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 7b37a28bbc..e5ac30f680 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -294,7 +294,7 @@ async fn handle( src, )); } - Err(e) => return Err(e.into_source()), + Err(e) => return Err(e.into_error()), }; let uri = request.uri.clone(); diff --git a/lychee-bin/src/commands/dump.rs b/lychee-bin/src/commands/dump.rs index 112ef98e2e..1030b3d91e 100644 --- a/lychee-bin/src/commands/dump.rs +++ b/lychee-bin/src/commands/dump.rs @@ -33,7 +33,7 @@ where warn!("{e}"); continue; } - Err(RequestError::GetInputContent(e)) => Err(e)?, + Err(RequestError::GetInputContent(e)) => Err(*e)?, }; // Apply URI remappings (if any) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 1f47872805..d5fa638df8 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -283,7 +283,12 @@ impl Collector { excluded_paths, ) .map(move |content| { - (content.map_err(RequestError::GetInputContent), base.clone()) + ( + content + .map_err(Box::new) + .map_err(RequestError::GetInputContent), + base.clone(), + ) }) } }) diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index 71a375d8b0..243f4400a7 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -12,27 +12,27 @@ use super::ResolvedInputSource; pub enum RequestError { /// Unable to construct a URL for a link appearing within the given source. #[error("Error building URL for {0}: {2}")] - CreateRequestItem(RawUri, ResolvedInputSource, #[source] ErrorKind), + CreateRequestItem(RawUri, ResolvedInputSource, #[source] Box), /// Unable to load the content of an input source. #[error("Error getting input content: {0}")] - GetInputContent(#[source] ErrorKind), + GetInputContent(#[source] Box), } impl RequestError { /// Get the underlying cause of this [`RequestError`]. - pub fn error(&self) -> &ErrorKind { + #[must_use] + pub const fn error(&self) -> &ErrorKind { match self { - Self::CreateRequestItem(_, _, e) => e, - Self::GetInputContent(e) => e, + Self::CreateRequestItem(_, _, e) | Self::GetInputContent(e) => e, } } /// Convert this [`RequestError`] into its source error. - pub fn into_source(self) -> ErrorKind { + #[must_use] + pub fn into_error(self) -> ErrorKind { match self { - Self::CreateRequestItem(_, _, e) => e, - Self::GetInputContent(e) => e, + Self::CreateRequestItem(_, _, e) | Self::GetInputContent(e) => *e, } } } diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index 0bceb59a40..96840654e6 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -221,8 +221,7 @@ impl Status { Status::Redirected(_, _) => ICON_REDIRECTED, Status::UnknownStatusCode(_) => ICON_UNKNOWN, Status::Excluded => ICON_EXCLUDED, - Status::Error(_) => ICON_ERROR, - Status::RequestError(_) => ICON_ERROR, + Status::Error(_) | Status::RequestError(_) => ICON_ERROR, Status::Timeout(_) => ICON_TIMEOUT, Status::Unsupported(_) => ICON_UNSUPPORTED, Status::Cached(_) => ICON_CACHED, diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index c293f03470..336fa5079a 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -127,8 +127,9 @@ pub(crate) fn create( uris.into_iter() .map(|raw_uri| { - create_request(&raw_uri, source, root_dir, base.as_ref(), extractor) - .map_err(|e| RequestError::CreateRequestItem(raw_uri.clone(), source.clone(), e)) + create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| { + RequestError::CreateRequestItem(raw_uri.clone(), source.clone(), Box::new(e)) + }) }) .collect() } From 17fad2c8e2d64bc75a71094f5c0484e2bba37971 Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 7 Oct 2025 16:32:31 +1000 Subject: [PATCH 25/63] propagate input loading errors too this slightly winds back the ResolvedInputSource usage and changes Response back to an ordinary InputSource --- lychee-bin/src/commands/check.rs | 20 ++++++++++++-------- lychee-bin/src/commands/dump.rs | 3 +-- lychee-bin/src/formatters/stats/markdown.rs | 9 +++------ lychee-bin/src/formatters/stats/mod.rs | 20 +++++++------------- lychee-bin/src/stats.rs | 19 +++++++------------ lychee-lib/src/client.rs | 4 ++-- lychee-lib/src/collector.rs | 9 +-------- lychee-lib/src/types/input/input.rs | 8 ++++++-- lychee-lib/src/types/request.rs | 11 +++++------ lychee-lib/src/types/response.rs | 8 ++++---- 10 files changed, 48 insertions(+), 63 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index e5ac30f680..e7bc569390 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -272,7 +272,7 @@ async fn check_url(client: &Client, request: Request) -> Response { Response::new( uri.clone(), Status::Error(ErrorKind::InvalidURI(uri.clone())), - source, + source.into(), ) }) } @@ -287,14 +287,18 @@ async fn handle( ) -> Result { let request = match request { Ok(x) => x, - Err(RequestError::CreateRequestItem(uri, src, e)) => { + Err(e) => { + let src = match e { + RequestError::CreateRequestItem(_, ref src, _) => src.clone().into(), + RequestError::GetInputContent(ref src, _) => src.clone(), + }; + return Ok(Response::new( Uri::try_from("error://").unwrap(), - Status::RequestError(RequestError::CreateRequestItem(uri, src.clone(), e)), + Status::RequestError(e), src, )); } - Err(e) => return Err(e.into_error()), }; let uri = request.uri.clone(); @@ -311,7 +315,7 @@ async fn handle( // code. Status::from_cache_status(v.value().status, &accept) }; - return Ok(Response::new(uri.clone(), status, request.source)); + return Ok(Response::new(uri.clone(), status, request.source.into())); } // Request was not cached; run a normal check @@ -409,7 +413,7 @@ mod tests { use crate::{formatters::get_response_formatter, options}; use http::StatusCode; use log::info; - use lychee_lib::{CacheStatus, ClientBuilder, ErrorKind, ResolvedInputSource, Uri}; + use lychee_lib::{CacheStatus, ClientBuilder, ErrorKind, Uri}; use super::*; @@ -419,7 +423,7 @@ mod tests { let response = Response::new( Uri::try_from("http://127.0.0.1").unwrap(), Status::Cached(CacheStatus::Ok(200)), - ResolvedInputSource::Stdin, + InputSource::Stdin, ); let formatter = get_response_formatter(&options::OutputMode::Plain); show_progress( @@ -441,7 +445,7 @@ mod tests { let response = Response::new( Uri::try_from("http://127.0.0.1").unwrap(), Status::Cached(CacheStatus::Ok(200)), - ResolvedInputSource::Stdin, + InputSource::Stdin, ); let formatter = get_response_formatter(&options::OutputMode::Plain); show_progress( diff --git a/lychee-bin/src/commands/dump.rs b/lychee-bin/src/commands/dump.rs index 1030b3d91e..d2cc97c2e6 100644 --- a/lychee-bin/src/commands/dump.rs +++ b/lychee-bin/src/commands/dump.rs @@ -29,11 +29,10 @@ where while let Some(request) = requests.next().await { let mut request = match request { Ok(x) => x, - Err(e @ RequestError::CreateRequestItem(_, _, _)) => { + Err(e) => { warn!("{e}"); continue; } - Err(RequestError::GetInputContent(e)) => Err(*e)?, }; // Apply URI remappings (if any) diff --git a/lychee-bin/src/formatters/stats/markdown.rs b/lychee-bin/src/formatters/stats/markdown.rs index 31b636480a..256add3977 100644 --- a/lychee-bin/src/formatters/stats/markdown.rs +++ b/lychee-bin/src/formatters/stats/markdown.rs @@ -165,10 +165,7 @@ impl StatsFormatter for Markdown { #[cfg(test)] mod tests { use http::StatusCode; - use lychee_lib::{ - CacheStatus, InputSource, Redirects, ResolvedInputSource, Response, ResponseBody, Status, - Uri, - }; + use lychee_lib::{CacheStatus, InputSource, Redirects, Response, ResponseBody, Status, Uri}; use reqwest::Url; use crate::formatters::suggestion::Suggestion; @@ -230,7 +227,7 @@ mod tests { stats.add(Response::new( Uri::try_from("http://127.0.0.1").unwrap(), Status::Cached(CacheStatus::Error(Some(404))), - ResolvedInputSource::Stdin, + InputSource::Stdin, )); // Add suggestion @@ -254,7 +251,7 @@ mod tests { Url::parse("http://redirected.dev").unwrap(), ]), ), - ResolvedInputSource::Stdin, + InputSource::Stdin, )); let summary = MarkdownResponseStats(stats); diff --git a/lychee-bin/src/formatters/stats/mod.rs b/lychee-bin/src/formatters/stats/mod.rs index 0af5600153..a9cd0c002c 100644 --- a/lychee-bin/src/formatters/stats/mod.rs +++ b/lychee-bin/src/formatters/stats/mod.rs @@ -55,14 +55,14 @@ where mod tests { use super::*; - use lychee_lib::{ErrorKind, ResolvedInputSource, Response, Status, Uri}; + use lychee_lib::{ErrorKind, Response, Status, Uri}; use url::Url; fn make_test_url(url: &str) -> Url { Url::parse(url).expect("Expected valid Website URI") } - fn make_test_response(url_str: &str, source: ResolvedInputSource) -> Response { + fn make_test_response(url_str: &str, source: InputSource) -> Response { let uri = Uri::from(make_test_url(url_str)); Response::new(uri, Status::Error(ErrorKind::TestError), source) @@ -74,18 +74,12 @@ mod tests { // Sorted list of test sources let test_sources = vec![ - ResolvedInputSource::RemoteUrl(Box::new(make_test_url("https://example.com/404"))), - ResolvedInputSource::RemoteUrl(Box::new(make_test_url("https://example.com/home"))), - ResolvedInputSource::RemoteUrl(Box::new(make_test_url("https://example.com/page/1"))), - ResolvedInputSource::RemoteUrl(Box::new(make_test_url("https://example.com/page/10"))), + InputSource::RemoteUrl(Box::new(make_test_url("https://example.com/404"))), + InputSource::RemoteUrl(Box::new(make_test_url("https://example.com/home"))), + InputSource::RemoteUrl(Box::new(make_test_url("https://example.com/page/1"))), + InputSource::RemoteUrl(Box::new(make_test_url("https://example.com/page/10"))), ]; - let unresolved_test_sources: Vec = test_sources - .iter() - .map(Clone::clone) - .map(Into::::into) - .collect(); - // Sorted list of test responses let test_response_urls = vec![ "https://example.com/", @@ -110,7 +104,7 @@ mod tests { .collect(); // Check that the input sources are sorted - assert_eq!(unresolved_test_sources, sorted_sources); + assert_eq!(test_sources, sorted_sources); // Check that the responses are sorted for (_, response_bodies) in sorted_errors { diff --git a/lychee-bin/src/stats.rs b/lychee-bin/src/stats.rs index b39c56d0d1..e7614affb1 100644 --- a/lychee-bin/src/stats.rs +++ b/lychee-bin/src/stats.rs @@ -92,7 +92,7 @@ impl ResponseStats { /// Add a response status to the appropriate map (success, fail, excluded) fn add_response_status(&mut self, response: Response) { let status = response.status(); - let source: InputSource = response.source().clone().into(); + let source: InputSource = response.source().clone(); let status_map_entry = match status { _ if status.is_error() => self.error_map.entry(source).or_default(), Status::Ok(_) if self.detailed_stats => self.success_map.entry(source).or_default(), @@ -129,9 +129,7 @@ mod tests { use std::collections::{HashMap, HashSet}; use http::StatusCode; - use lychee_lib::{ - ErrorKind, InputSource, ResolvedInputSource, Response, ResponseBody, Status, Uri, - }; + use lychee_lib::{ErrorKind, InputSource, Response, ResponseBody, Status, Uri}; use reqwest::Url; use super::ResponseStats; @@ -145,7 +143,7 @@ mod tests { // and it's a lot faster to just generate a fake response fn mock_response(status: Status) -> Response { let uri = website("https://some-url.com/ok"); - Response::new(uri, status, ResolvedInputSource::Stdin) + Response::new(uri, status, InputSource::Stdin) } fn dummy_ok() -> Response { @@ -181,10 +179,7 @@ mod tests { let response = dummy_error(); let expected_error_map: HashMap> = - HashMap::from_iter([( - response.source().clone().into(), - HashSet::from_iter([response.1]), - )]); + HashMap::from_iter([(response.source().clone(), HashSet::from_iter([response.1]))]); assert_eq!(stats.error_map, expected_error_map); assert!(stats.success_map.is_empty()); @@ -204,7 +199,7 @@ mod tests { let mut expected_error_map: HashMap> = HashMap::new(); let response = dummy_error(); let entry = expected_error_map - .entry(response.source().clone().into()) + .entry(response.source().clone()) .or_default(); entry.insert(response.1); assert_eq!(stats.error_map, expected_error_map); @@ -212,7 +207,7 @@ mod tests { let mut expected_success_map: HashMap> = HashMap::new(); let response = dummy_ok(); let entry = expected_success_map - .entry(response.source().clone().into()) + .entry(response.source().clone()) .or_default(); entry.insert(response.1); assert_eq!(stats.success_map, expected_success_map); @@ -220,7 +215,7 @@ mod tests { let mut expected_excluded_map: HashMap> = HashMap::new(); let response = dummy_excluded(); let entry = expected_excluded_map - .entry(response.source().clone().into()) + .entry(response.source().clone()) .or_default(); entry.insert(response.1); assert_eq!(stats.excluded_map, expected_excluded_map); diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 63306bb91c..a6efa70dc9 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -494,7 +494,7 @@ impl Client { self.remap(uri)?; if self.is_excluded(uri) { - return Ok(Response::new(uri.clone(), Status::Excluded, source)); + return Ok(Response::new(uri.clone(), Status::Excluded, source.into())); } let status = match uri.scheme() { @@ -505,7 +505,7 @@ impl Client { _ => self.check_website(uri, credentials).await?, }; - Ok(Response::new(uri.clone(), status, source)) + Ok(Response::new(uri.clone(), status, source.into())) } /// Check a single file using the file checker. diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index d5fa638df8..999152377e 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -282,14 +282,7 @@ impl Collector { resolver, excluded_paths, ) - .map(move |content| { - ( - content - .map_err(Box::new) - .map_err(RequestError::GetInputContent), - base.clone(), - ) - }) + .map(move |content| (content, base.clone())) } }) .flatten() diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index 1df0297f1d..23bd1ad2bc 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -8,9 +8,9 @@ use super::content::InputContent; use super::source::InputSource; use super::source::ResolvedInputSource; use crate::filter::PathExcludes; -use crate::types::FileType; use crate::types::file::FileExtensions; use crate::types::resolver::UrlContentResolver; +use crate::types::{FileType, RequestError}; use crate::{ErrorKind, Result}; use async_stream::try_stream; use futures::stream::{Stream, StreamExt}; @@ -170,7 +170,8 @@ impl Input { file_extensions: FileExtensions, resolver: UrlContentResolver, excluded_paths: PathExcludes, - ) -> impl Stream> { + ) -> impl Stream> { + let source = self.source.clone(); try_stream! { // Handle simple cases that don't need resolution match self.source { @@ -236,6 +237,9 @@ impl Input { } } } + .map(move |result| + result.map_err(|e| RequestError::GetInputContent(source.clone(), Box::new(e))) + ) } /// Create a `WalkBuilder` for directory traversal diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index 243f4400a7..2cb1ec893f 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -2,8 +2,7 @@ use std::{borrow::Cow, convert::TryFrom, fmt::Display}; use thiserror::Error; use crate::{BasicAuthCredentials, ErrorKind, RawUri, Uri}; - -use super::ResolvedInputSource; +use crate::{InputSource, ResolvedInputSource}; /// An error which occurs while trying to construct a [`Request`] object. /// That is, an error which happens while trying to load links from an input @@ -15,8 +14,8 @@ pub enum RequestError { CreateRequestItem(RawUri, ResolvedInputSource, #[source] Box), /// Unable to load the content of an input source. - #[error("Error getting input content: {0}")] - GetInputContent(#[source] Box), + #[error("Error reading input '{0}': {1}")] + GetInputContent(InputSource, #[source] Box), } impl RequestError { @@ -24,7 +23,7 @@ impl RequestError { #[must_use] pub const fn error(&self) -> &ErrorKind { match self { - Self::CreateRequestItem(_, _, e) | Self::GetInputContent(e) => e, + Self::CreateRequestItem(_, _, e) | Self::GetInputContent(_, e) => e, } } @@ -32,7 +31,7 @@ impl RequestError { #[must_use] pub fn into_error(self) -> ErrorKind { match self { - Self::CreateRequestItem(_, _, e) | Self::GetInputContent(e) => *e, + Self::CreateRequestItem(_, _, e) | Self::GetInputContent(_, e) => *e, } } } diff --git a/lychee-lib/src/types/response.rs b/lychee-lib/src/types/response.rs index 96e62c117f..d194d686ae 100644 --- a/lychee-lib/src/types/response.rs +++ b/lychee-lib/src/types/response.rs @@ -3,7 +3,7 @@ use std::fmt::Display; use http::StatusCode; use serde::Serialize; -use crate::{ResolvedInputSource, Status, Uri}; +use crate::{InputSource, Status, Uri}; /// Response type returned by lychee after checking a URI // @@ -14,13 +14,13 @@ use crate::{ResolvedInputSource, Status, Uri}; // `pub(crate)` is insufficient, because the `stats` module is in the `bin` // crate crate. #[derive(Debug)] -pub struct Response(ResolvedInputSource, pub ResponseBody); +pub struct Response(InputSource, pub ResponseBody); impl Response { #[inline] #[must_use] /// Create new response - pub const fn new(uri: Uri, status: Status, source: ResolvedInputSource) -> Self { + pub const fn new(uri: Uri, status: Status, source: InputSource) -> Self { Response(source, ResponseBody { uri, status }) } @@ -35,7 +35,7 @@ impl Response { #[must_use] /// Retrieve the underlying source of the response /// (e.g. the input file or the URL) - pub const fn source(&self) -> &ResolvedInputSource { + pub const fn source(&self) -> &InputSource { &self.0 } From 976904af3fd0c0d985b0db649b908ad94e1cd228 Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 7 Oct 2025 16:53:45 +1000 Subject: [PATCH 26/63] handle is no longer fallible. add helper function --- lychee-bin/src/commands/check.rs | 23 ++++++++++------------- lychee-lib/src/types/request.rs | 9 +++++++++ 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index e7bc569390..91fcf72b68 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -194,14 +194,13 @@ where /// Reads from the request channel and updates the progress bar status async fn progress_bar_task( - mut recv_resp: mpsc::Receiver>, + mut recv_resp: mpsc::Receiver, verbose: Verbosity, pb: Option, formatter: Box, mut stats: ResponseStats, ) -> Result<(Option, ResponseStats)> { while let Some(response) = recv_resp.recv().await { - let response = response?; show_progress( &mut io::stderr(), pb.as_ref(), @@ -229,7 +228,7 @@ fn init_progress_bar(initial_message: &'static str) -> ProgressBar { async fn request_channel_task( recv_req: mpsc::Receiver>, - send_resp: mpsc::Sender>, + send_resp: mpsc::Sender, max_concurrency: usize, client: Client, cache: Arc, @@ -284,20 +283,18 @@ async fn handle( cache_exclude_status: HashSet, request: std::result::Result, accept: HashSet, -) -> Result { +) -> Response { + // Note that the RequestError cases bypass the cache. let request = match request { Ok(x) => x, Err(e) => { - let src = match e { - RequestError::CreateRequestItem(_, ref src, _) => src.clone().into(), - RequestError::GetInputContent(ref src, _) => src.clone(), - }; + let src = e.input_source(); - return Ok(Response::new( + return Response::new( Uri::try_from("error://").unwrap(), Status::RequestError(e), src, - )); + ); } }; @@ -315,7 +312,7 @@ async fn handle( // code. Status::from_cache_status(v.value().status, &accept) }; - return Ok(Response::new(uri.clone(), status, request.source.into())); + return Response::new(uri.clone(), status, request.source.into()); } // Request was not cached; run a normal check @@ -329,11 +326,11 @@ async fn handle( // - Skip caching links for which the status code has been explicitly excluded from the cache. let status = response.status(); if ignore_cache(&uri, status, &cache_exclude_status) { - return Ok(response); + return response; } cache.insert(uri, status.into()); - Ok(response) + response } /// Returns `true` if the response should be ignored in the cache. diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index 2cb1ec893f..f90dda2e8f 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -34,6 +34,15 @@ impl RequestError { Self::CreateRequestItem(_, _, e) | Self::GetInputContent(_, e) => *e, } } + + /// Get (a clone of) the input source within which the error happened. + #[must_use] + pub fn input_source(&self) -> InputSource { + match self { + Self::CreateRequestItem(_, src, _) => src.clone().into(), + Self::GetInputContent(src, _) => src.clone(), + } + } } /// A request type that can be handle by lychee From f0eda830aa16c5074583dd5ab9a1664bdd70b1e7 Mon Sep 17 00:00:00 2001 From: katrinafyi <39479354+katrinafyi@users.noreply.github.com> Date: Thu, 9 Oct 2025 17:30:50 +1000 Subject: [PATCH 27/63] Delete clippy.toml this shouldn't be needed anymore since we no longer use a recursive case in ErrorKind. instead, we use the new RequestError type. --- clippy.toml | 1 - 1 file changed, 1 deletion(-) delete mode 100644 clippy.toml diff --git a/clippy.toml b/clippy.toml deleted file mode 100644 index c9808129d6..0000000000 --- a/clippy.toml +++ /dev/null @@ -1 +0,0 @@ -ignore-interior-mutability = ["lychee_lib::ErrorKind"] From 704ad9869d209054339ab75ed20b6f2bdfeb762c Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 9 Oct 2025 17:32:33 +1000 Subject: [PATCH 28/63] Revert "Delete clippy.toml" This reverts commit f0eda830aa16c5074583dd5ab9a1664bdd70b1e7. --- clippy.toml | 1 + 1 file changed, 1 insertion(+) create mode 100644 clippy.toml diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 0000000000..c9808129d6 --- /dev/null +++ b/clippy.toml @@ -0,0 +1 @@ +ignore-interior-mutability = ["lychee_lib::ErrorKind"] From e5152ccd96ba0afe4a27cf4a27032f15010d8b94 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 9 Oct 2025 17:36:07 +1000 Subject: [PATCH 29/63] explain ignore-interior-mutability --- clippy.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clippy.toml b/clippy.toml index c9808129d6..ebb0a1f9d5 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1 +1,8 @@ +# This is needed when using ErrorKind within a hashed collection. This is because +# ErrorKind contains some errors which have a backtrace and those backtraces +# make use of mutable cells. We expect this is safe, since the mutability is +# contained within Once and so probably only mutated during initialisation. +# +# Therefore, the hash value *should* remain stable while it's within the hashed +# collection. ignore-interior-mutability = ["lychee_lib::ErrorKind"] From eef8b78304720199a1d0bc6e3c8286790101853a Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 23 Oct 2025 23:19:41 +1000 Subject: [PATCH 30/63] fix compilation --- lychee-lib/src/types/uri/raw.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lychee-lib/src/types/uri/raw.rs b/lychee-lib/src/types/uri/raw.rs index f89f1a4ceb..026ee75821 100644 --- a/lychee-lib/src/types/uri/raw.rs +++ b/lychee-lib/src/types/uri/raw.rs @@ -42,7 +42,7 @@ impl From<(&str, RawUriSpan)> for RawUri { /// A span of a [`RawUri`] in the document. /// /// The span can be used to give more precise error messages. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct RawUriSpan { /// The line of the URI. /// From 1a1b5df9f179389bab3ab56b6df041f3f4f3fe5e Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 24 Oct 2025 01:28:28 +1000 Subject: [PATCH 31/63] review: add new error case for user-provided input failures, but should this just be a boolean in GetInputContent case? --- lychee-bin/src/commands/check.rs | 4 ++++ lychee-bin/src/commands/dump.rs | 4 ++++ lychee-lib/src/types/input/input.rs | 17 +++++++++-------- lychee-lib/src/types/request.rs | 13 +++++++++++-- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 40dab0fddc..0e5193060b 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -185,6 +185,10 @@ where { tokio::pin!(requests); while let Some(request) = requests.next().await { + if let Err(e @ RequestError::UserInputContent { .. }) = request { + return Err(e.into_error()); + } + if let Some(pb) = &bar { pb.inc_length(1); if let Ok(request) = &request { diff --git a/lychee-bin/src/commands/dump.rs b/lychee-bin/src/commands/dump.rs index d2cc97c2e6..93cc8b215d 100644 --- a/lychee-bin/src/commands/dump.rs +++ b/lychee-bin/src/commands/dump.rs @@ -27,6 +27,10 @@ where let mut writer = super::create_writer(params.cfg.output)?; while let Some(request) = requests.next().await { + if let Err(e @ RequestError::UserInputContent { .. }) = request { + return Err(e.into_error()); + } + let mut request = match request { Ok(x) => x, Err(e) => { diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index a788f26f5e..de73451ee4 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -171,20 +171,24 @@ impl Input { resolver: UrlContentResolver, excluded_paths: PathExcludes, ) -> impl Stream> { - let source = self.source.clone(); try_stream! { + let source = self.source.clone(); + + let user_input_error = move |e: ErrorKind| RequestError::UserInputContent(source.clone(), Box::new(e)); + let discovered_input_error = |e: ErrorKind| RequestError::GetInputContent(self.source.clone(), Box::new(e)); + // Handle simple cases that don't need resolution match self.source { InputSource::RemoteUrl(url) => { match resolver.url_contents(*url).await { Err(_) if skip_missing => (), - Err(e) => Err(e)?, + Err(e) => Err(e).map_err(user_input_error)?, Ok(content) => yield content, } return; } InputSource::Stdin => { - yield Self::stdin_content(self.file_type_hint).await?; + yield Self::stdin_content(self.file_type_hint).await.map_err(user_input_error)?; return; } InputSource::String(ref s) => { @@ -231,14 +235,14 @@ impl Input { log::warn!("Skipping file with invalid UTF-8 content: {}", path.display()); } }, - Err(e) => Err(e)?, + Err(e) => Err(e).map_err(discovered_input_error)?, Ok(content) => { sources_empty = false; yield content } } }, - Err(e) => Err(e)?, + Err(e) => Err(e).map_err(discovered_input_error)?, } } @@ -246,9 +250,6 @@ impl Input { log::warn!("{}: No files found for this input source", self.source); } } - .map(move |result| - result.map_err(|e| RequestError::GetInputContent(source.clone(), Box::new(e))) - ) } /// Create a `WalkBuilder` for directory traversal diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index f90dda2e8f..a911296ff5 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -16,6 +16,10 @@ pub enum RequestError { /// Unable to load the content of an input source. #[error("Error reading input '{0}': {1}")] GetInputContent(InputSource, #[source] Box), + + /// Unable to load an input source directly provided by the user. + #[error("Error reading user input '{0}': {1}")] + UserInputContent(InputSource, #[source] Box), } impl RequestError { @@ -23,7 +27,9 @@ impl RequestError { #[must_use] pub const fn error(&self) -> &ErrorKind { match self { - Self::CreateRequestItem(_, _, e) | Self::GetInputContent(_, e) => e, + Self::CreateRequestItem(_, _, e) + | Self::GetInputContent(_, e) + | Self::UserInputContent(_, e) => e, } } @@ -31,7 +37,9 @@ impl RequestError { #[must_use] pub fn into_error(self) -> ErrorKind { match self { - Self::CreateRequestItem(_, _, e) | Self::GetInputContent(_, e) => *e, + Self::CreateRequestItem(_, _, e) + | Self::GetInputContent(_, e) + | Self::UserInputContent(_, e) => *e, } } @@ -41,6 +49,7 @@ impl RequestError { match self { Self::CreateRequestItem(_, src, _) => src.clone().into(), Self::GetInputContent(src, _) => src.clone(), + Self::UserInputContent(src, _) => src.clone(), } } } From f331dbef871f3c2b5280c6d24014a3fdb521d006 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 24 Oct 2025 01:38:15 +1000 Subject: [PATCH 32/63] lint --- lychee-lib/src/types/input/input.rs | 6 +++--- lychee-lib/src/types/request.rs | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index de73451ee4..b0bdac4249 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -182,7 +182,7 @@ impl Input { InputSource::RemoteUrl(url) => { match resolver.url_contents(*url).await { Err(_) if skip_missing => (), - Err(e) => Err(e).map_err(user_input_error)?, + Err(e) => Err(user_input_error(e))?, Ok(content) => yield content, } return; @@ -235,14 +235,14 @@ impl Input { log::warn!("Skipping file with invalid UTF-8 content: {}", path.display()); } }, - Err(e) => Err(e).map_err(discovered_input_error)?, + Err(e) => Err(discovered_input_error(e))?, Ok(content) => { sources_empty = false; yield content } } }, - Err(e) => Err(e).map_err(discovered_input_error)?, + Err(e) => Err(discovered_input_error(e))?, } } diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index a911296ff5..ab792fd11e 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -48,8 +48,7 @@ impl RequestError { pub fn input_source(&self) -> InputSource { match self { Self::CreateRequestItem(_, src, _) => src.clone().into(), - Self::GetInputContent(src, _) => src.clone(), - Self::UserInputContent(src, _) => src.clone(), + Self::GetInputContent(src, _) | Self::UserInputContent(src, _) => src.clone(), } } } From 5716ee97911492a978aada7393497dba84280915 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 24 Oct 2025 14:11:55 +1000 Subject: [PATCH 33/63] move UserInputContent case into fn handle --- lychee-bin/src/commands/check.rs | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 0e5193060b..4e5d4cffed 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -185,10 +185,6 @@ where { tokio::pin!(requests); while let Some(request) = requests.next().await { - if let Err(e @ RequestError::UserInputContent { .. }) = request { - return Err(e.into_error()); - } - if let Some(pb) = &bar { pb.inc_length(1); if let Ok(request) = &request { @@ -202,13 +198,14 @@ where /// Reads from the request channel and updates the progress bar status async fn progress_bar_task( - mut recv_resp: mpsc::Receiver, + mut recv_resp: mpsc::Receiver>, verbose: Verbosity, pb: Option, formatter: Box, mut stats: ResponseStats, ) -> Result<(Option, ResponseStats)> { while let Some(response) = recv_resp.recv().await { + let response = response?; show_progress( &mut io::stderr(), pb.as_ref(), @@ -236,7 +233,7 @@ fn init_progress_bar(initial_message: &'static str) -> ProgressBar { async fn request_channel_task( recv_req: mpsc::Receiver>, - send_resp: mpsc::Sender, + send_resp: mpsc::Sender>, max_concurrency: usize, client: Client, cache: Arc, @@ -285,24 +282,34 @@ async fn check_url(client: &Client, request: Request) -> Response { } /// Handle a single request +/// +/// # Errors +/// +/// An Err is returned if and only if there was an error while loading +/// a *user-provided* input argument. Other errors, including errors in +/// link resolution and in resolved inputs, will be returned as Ok with +/// a failed response. async fn handle( client: &Client, cache: Arc, cache_exclude_status: HashSet, request: std::result::Result, accept: HashSet, -) -> Response { +) -> Result { // Note that the RequestError cases bypass the cache. let request = match request { Ok(x) => x, + Err(e @ RequestError::UserInputContent { .. }) => { + return Err(e.into_error()); + } Err(e) => { let src = e.input_source(); - return Response::new( + return Ok(Response::new( Uri::try_from("error://").unwrap(), Status::RequestError(e), src, - ); + )); } }; @@ -320,7 +327,7 @@ async fn handle( // code. Status::from_cache_status(v.value().status, &accept) }; - return Response::new(uri.clone(), status, request.source.into()); + return Ok(Response::new(uri.clone(), status, request.source.into())); } // Request was not cached; run a normal check @@ -334,11 +341,11 @@ async fn handle( // - Skip caching links for which the status code has been explicitly excluded from the cache. let status = response.status(); if ignore_cache(&uri, status, &cache_exclude_status) { - return response; + return Ok(response); } cache.insert(uri, status.into()); - response + Ok(response) } /// Returns `true` if the response should be ignored in the cache. From 300e3685912985105592ff41be6fd205687a1475 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 24 Oct 2025 14:25:35 +1000 Subject: [PATCH 34/63] try fix lint. help welcome. i can't compile this for some reason `make lint` fails error[E0658]: `let` expressions in this position are unstable --> /home/x/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/wiremock-0.6.5/src/matchers.rs:214:12 | 214 | if let Ok(url) = Url::parse(&path) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | --- examples/collect_links/collect_links.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs index 7671be4fcf..74e95b0f03 100644 --- a/examples/collect_links/collect_links.rs +++ b/examples/collect_links/collect_links.rs @@ -4,7 +4,7 @@ use std::{collections::HashSet, path::PathBuf, result::Result}; use tokio_stream::StreamExt; #[tokio::main] -async fn main() -> Result<(), RequestError> { +async fn main() -> Result<(), Box> { // Collect all links from the following inputs let inputs = HashSet::from_iter([ Input::from_input_source(InputSource::RemoteUrl(Box::new( From e22f13c65277e100ce088c425c5ecd96e0f80bb8 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 24 Oct 2025 15:15:34 +1000 Subject: [PATCH 35/63] =?UTF-8?q?add=20early=20checking=20for=20file=20and?= =?UTF-8?q?=20dir=20permissions=20(and=20fmt=20=F0=9F=99=8A)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lychee-lib/src/types/input/input.rs | 52 +++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index b0bdac4249..4ab8ac54ca 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -174,10 +174,13 @@ impl Input { try_stream! { let source = self.source.clone(); - let user_input_error = move |e: ErrorKind| RequestError::UserInputContent(source.clone(), Box::new(e)); - let discovered_input_error = |e: ErrorKind| RequestError::GetInputContent(self.source.clone(), Box::new(e)); + let user_input_error = + move |e: ErrorKind| RequestError::UserInputContent(source.clone(), Box::new(e)); + let discovered_input_error = + |e: ErrorKind| RequestError::GetInputContent(self.source.clone(), Box::new(e)); - // Handle simple cases that don't need resolution + // Handle simple cases that don't need resolution, and perform simple + // checks for more complex cases. match self.source { InputSource::RemoteUrl(url) => { match resolver.url_contents(*url).await { @@ -187,8 +190,27 @@ impl Input { } return; } + InputSource::FsPath(ref path) => { + let is_readable = if path.is_dir() { + path.read_dir().map(|_| ()) + } else { + // this checks existence without requiring an open. opening here, + // then re-opening later, might cause problems with pipes. + path.metadata().map(|_| ()) + }; + + match is_readable { + Ok(_) => (), + Err(e) => Err(user_input_error(ErrorKind::ReadFileInput( + e, + path.to_path_buf(), + )))?, + } + } InputSource::Stdin => { - yield Self::stdin_content(self.file_type_hint).await.map_err(user_input_error)?; + yield Self::stdin_content(self.file_type_hint) + .await + .map_err(user_input_error)?; return; } InputSource::String(ref s) => { @@ -213,35 +235,37 @@ impl Input { match source_result { Ok(source) => { let content_result = match source { - ResolvedInputSource::FsPath(path) => { - Self::path_content(&path).await - }, + ResolvedInputSource::FsPath(path) => Self::path_content(&path).await, ResolvedInputSource::RemoteUrl(url) => { resolver.url_contents(*url).await - }, + } ResolvedInputSource::Stdin => { Self::stdin_content(self.file_type_hint).await - }, + } ResolvedInputSource::String(s) => { Ok(Self::string_content(&s, self.file_type_hint)) - }, + } }; match content_result { Err(_) if skip_missing => (), - Err(e) if matches!(&e, ErrorKind::ReadFileInput(io_err, _) if io_err.kind() == std::io::ErrorKind::InvalidData) => { + Err(e) if matches!(&e, ErrorKind::ReadFileInput(io_err, _) if io_err.kind() == std::io::ErrorKind::InvalidData) => + { // If the file contains invalid UTF-8 (e.g. binary), we skip it if let ErrorKind::ReadFileInput(_, path) = &e { - log::warn!("Skipping file with invalid UTF-8 content: {}", path.display()); + log::warn!( + "Skipping file with invalid UTF-8 content: {}", + path.display() + ); } - }, + } Err(e) => Err(discovered_input_error(e))?, Ok(content) => { sources_empty = false; yield content } } - }, + } Err(e) => Err(discovered_input_error(e))?, } } From 5cc0c1bc2eececcb2226dbb260239b9e4e61e609 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 24 Oct 2025 15:25:28 +1000 Subject: [PATCH 36/63] lint --- lychee-lib/src/types/input/input.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index 4ab8ac54ca..cedd269e48 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -200,10 +200,10 @@ impl Input { }; match is_readable { - Ok(_) => (), + Ok(()) => (), Err(e) => Err(user_input_error(ErrorKind::ReadFileInput( e, - path.to_path_buf(), + path.clone(), )))?, } } From 45906e8b8bbd9c81dfb3e54220f13b261d8a1927 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 24 Oct 2025 15:34:17 +1000 Subject: [PATCH 37/63] use DirTraversal error for dir failures --- lychee-lib/src/types/input/input.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index cedd269e48..6e027f2475 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -192,20 +192,19 @@ impl Input { } InputSource::FsPath(ref path) => { let is_readable = if path.is_dir() { - path.read_dir().map(|_| ()) + path.read_dir() + .map(|_| ()) + .map_err(|e| ErrorKind::DirTraversal(ignore::Error::Io(e))) } else { // this checks existence without requiring an open. opening here, - // then re-opening later, might cause problems with pipes. - path.metadata().map(|_| ()) + // then re-opening later, might cause problems with pipes. this + // does not validate permissions. + path.metadata() + .map(|_| ()) + .map_err(|e| ErrorKind::ReadFileInput(e, path.clone())) }; - match is_readable { - Ok(()) => (), - Err(e) => Err(user_input_error(ErrorKind::ReadFileInput( - e, - path.clone(), - )))?, - } + is_readable.map_err(user_input_error)?; } InputSource::Stdin => { yield Self::stdin_content(self.file_type_hint) From 2397577f3f78c7dd747e2a24e189cc0c0b45b7a2 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 24 Oct 2025 20:46:49 +1000 Subject: [PATCH 38/63] update tests, including adjusting existing adjusted existing test case `test_ignore_absolute_local_links_without_base` --- lychee-bin/tests/cli.rs | 34 ++++++++++++++++++++++++----- lychee-lib/src/types/input/input.rs | 7 ++++-- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 216f36eb59..9b46e7623e 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1439,11 +1439,10 @@ mod cli { Ok(()) } - /// If `base-dir` is not set, don't throw an error in case we encounter + /// If `base-dir` is not set, an error should be thrown if we encounter /// an absolute local link (e.g. `/about`) within a file. - /// Instead, simply ignore the link. #[test] - fn test_ignore_absolute_local_links_without_base() -> Result<()> { + fn test_absolute_local_links_without_base() -> Result<()> { let mut cmd = main_command!(); let offline_dir = fixtures_path!().join("offline"); @@ -1452,8 +1451,9 @@ mod cli { .arg(offline_dir.join("index.html")) .env_clear() .assert() - .success() - .stdout(contains("0 Total")); + .failure() + .stdout(contains("5 Error")) + .stdout(contains("Error building URL").count(5)); Ok(()) } @@ -2965,4 +2965,28 @@ mod cli { Ok(()) } + + // An input which is invalid (no permission directory or invalid glob) + // should fail as a CLI error, not a link checking error. + #[test] + fn test_invalid_user_input_source() -> Result<()> { + main_command!() + .arg("http://website.invalid") + .assert() + .failure() + .code(1); + + // maybe test with a directory with no write permissions? but there + // doesn't seem to be an equivalent to chmod on the windows API: + // https://doc.rust-lang.org/std/fs/struct.Permissions.html + + main_command!() + .arg("invalid-glob[") + .assert() + .failure() + .code(2); + // TODO: change above exit code to 1 after https://github.com/lycheeverse/lychee/pull/1869 + + Ok(()) + } } diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index 6e027f2475..520b2fbc80 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -179,8 +179,11 @@ impl Input { let discovered_input_error = |e: ErrorKind| RequestError::GetInputContent(self.source.clone(), Box::new(e)); - // Handle simple cases that don't need resolution, and perform simple - // checks for more complex cases. + // Handle simple cases that don't need resolution. Also, perform + // simple *stateful* checks for more complex input sources. + // + // However, stateless well-formedness checks (e.g., checking glob + // syntax) should be done in Input::new. match self.source { InputSource::RemoteUrl(url) => { match resolver.url_contents(*url).await { From 802427b290d102ceb9806c51558b768a3355f04f Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 24 Oct 2025 20:49:22 +1000 Subject: [PATCH 39/63] remove "Skip relative URLs" from readme feature table should it be replaced with something else instead? --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 0af194522e..77b9574523 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,6 @@ outdated information. | Custom user agent | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] | ![no] | ![no] | | Relative URLs | ![yes] | ![yes] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | | Anchors/Fragments | ![yes] | ![no] | ![no] | ![no] | ![no] | ![yes] | ![yes] | ![no] | -| Skip relative URLs | ![yes] | ![no] | ![no] | ![maybe] | ![no] | ![no] | ![no] | ![no] | | Include patterns | ![yes]️ | ![yes] | ![no] | ![yes] | ![no] | ![no] | ![no] | ![no] | | Exclude patterns | ![yes] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | | Handle redirects | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | From c594f6b584a370ada91c72d5dce34edd80b3fa70 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 13 Nov 2025 09:46:30 +1000 Subject: [PATCH 40/63] lint; --- lychee-lib/src/collector.rs | 6 ++---- lychee-lib/src/lib.rs | 7 +++---- lychee-lib/src/types/input/input.rs | 5 +---- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 69ec975eb8..48d53fb3be 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -5,10 +5,8 @@ use crate::filter::PathExcludes; use crate::types::resolver::UrlContentResolver; use crate::{ - Base, Base, Input, Input, InputResolver, Request, Request, RequestError, Result, Result, - basic_auth::BasicAuthExtractor, basic_auth::BasicAuthExtractor, extract::Extractor, - extract::Extractor, types::FileExtensions, types::FileExtensions, types::uri::raw::RawUri, - types::uri::raw::RawUri, utils::request, utils::request, + Base, Input, Request, RequestError, Result, basic_auth::BasicAuthExtractor, extract::Extractor, + types::FileExtensions, types::uri::raw::RawUri, utils::request, }; use futures::TryStreamExt; use futures::{ diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index 4416b570f1..04abc805d6 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -95,9 +95,8 @@ pub use crate::{ types::{ AcceptRange, AcceptRangeError, Base, BasicAuthCredentials, BasicAuthSelector, CacheStatus, CookieJar, ErrorKind, FileExtensions, FileType, Input, InputContent, InputResolver, - InputSource, InputSource, Preprocessor, Redirects, Redirects, Request, Request, - RequestError, ResolvedInputSource, ResolvedInputSource, Response, Response, ResponseBody, - ResponseBody, Result, Result, Status, Status, StatusCodeExcluder, StatusCodeExcluder, - StatusCodeSelector, StatusCodeSelector, uri::raw::RawUri, uri::valid::Uri, uri::valid::Uri, + InputSource, Preprocessor, Redirects, Request, RequestError, ResolvedInputSource, Response, + ResponseBody, Result, Status, StatusCodeExcluder, StatusCodeSelector, uri::raw::RawUri, + uri::valid::Uri, }, }; diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index 873c6f6cfc..ba787777bb 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -8,10 +8,7 @@ use super::content::InputContent; use super::source::{InputSource, ResolvedInputSource}; use crate::Preprocessor; use crate::filter::PathExcludes; -use crate::types::file::FileExtensions; -use crate::types::resolver::UrlContentResolver; -use crate::types::{FileType, RequestError}; -use crate::types::{FileType, file::FileExtensions, resolver::UrlContentResolver}; +use crate::types::{FileType, RequestError, file::FileExtensions, resolver::UrlContentResolver}; use crate::{ErrorKind, Result}; use async_stream::try_stream; use futures::stream::{Stream, StreamExt}; From 048edd6aebdab1a09f947578b1ec993943b152f9 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 13 Nov 2025 09:52:28 +1000 Subject: [PATCH 41/63] fix invalid glob test --- lychee-bin/tests/cli.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 8ac162f8d2..e443259176 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -3019,8 +3019,7 @@ The config file should contain every possible key for documentation purposes." .arg("invalid-glob[") .assert() .failure() - .code(2); - // TODO: change above exit code to 1 after https://github.com/lycheeverse/lychee/pull/1869 + .code(1); Ok(()) } From df91d1eb3d554e2ef07cd5c2a333d1add0964171 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 13 Nov 2025 10:14:16 +1000 Subject: [PATCH 42/63] Preprocessor PathBuf --- lychee-bin/src/options.rs | 1 + lychee-lib/src/types/error.rs | 4 ++-- lychee-lib/src/types/preprocessor/mod.rs | 21 +++++++++++++++------ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index f0891b7663..dc026b5b81 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -861,6 +861,7 @@ and existing cookies will be updated." short, long, value_name = "COMMAND", + value_parser = Preprocessor::new, long_help = r#"Preprocess input files. For each file input, this flag causes lychee to execute `COMMAND PATH` and process its standard output instead of the original contents of PATH. This allows you to diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index a2db4b2746..7a5947bbfa 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -174,7 +174,7 @@ pub enum ErrorKind { #[error("Preprocessor command '{command}' failed: {reason}")] PreprocessorError { /// The command which did not execute successfully - command: String, + command: PathBuf, /// The reason the command failed reason: String, }, @@ -336,7 +336,7 @@ impl ErrorKind { [name] => format!("An index file ({name}) is required"), [init @ .., tail] => format!("An index file ({}, or {}) is required", init.join(", "), tail), }.into(), - ErrorKind::PreprocessorError{command, reason} => Some(format!("Command '{command}' failed {reason}. Check value of the pre option")) + ErrorKind::PreprocessorError{command, reason} => Some(format!("Command '{}' failed {}. Check value of the pre option", command.to_string_lossy(), reason)) } } diff --git a/lychee-lib/src/types/preprocessor/mod.rs b/lychee-lib/src/types/preprocessor/mod.rs index 0999b15332..85c939ca58 100644 --- a/lychee-lib/src/types/preprocessor/mod.rs +++ b/lychee-lib/src/types/preprocessor/mod.rs @@ -1,4 +1,4 @@ -use std::{path::PathBuf, process::Command}; +use std::{path::Path, path::PathBuf, process::Command}; use serde::Deserialize; @@ -14,19 +14,28 @@ use super::{ErrorKind, Result}; /// create a shell script to specify it as preprocessor command. #[derive(Debug, Clone, Deserialize, PartialEq, Eq)] pub struct Preprocessor { - command: String, + command: PathBuf, } -impl From for Preprocessor { - fn from(command: String) -> Self { - Self { command } +impl Preprocessor { + /// Constructs a new [`Preprocessor`] from the given command, while + /// validating that the command exists. + pub fn new(command: &str) -> Result { + let command = PathBuf::from(command); + match command.metadata() { + Err(e) => Err(ErrorKind::PreprocessorError { + command, + reason: format!("command not found: {e}"), + }), + Ok(_) => Ok(Self { command }), + } } } impl Preprocessor { /// Try to invoke the preprocessor command with `path` as single argument /// and return the resulting stdout. - pub(crate) fn process(&self, path: &PathBuf) -> Result { + pub(crate) fn process(&self, path: &Path) -> Result { let output = Command::new(&self.command) .arg(path) .output() From 347a030bae096a25189b44bc9389b0e99c856d60 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 13 Nov 2025 10:14:21 +1000 Subject: [PATCH 43/63] Revert "Preprocessor PathBuf" ohhhhhh we can't do that because of PATH searches This reverts commit df91d1eb3d554e2ef07cd5c2a333d1add0964171. --- lychee-bin/src/options.rs | 1 - lychee-lib/src/types/error.rs | 4 ++-- lychee-lib/src/types/preprocessor/mod.rs | 21 ++++++--------------- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index dc026b5b81..f0891b7663 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -861,7 +861,6 @@ and existing cookies will be updated." short, long, value_name = "COMMAND", - value_parser = Preprocessor::new, long_help = r#"Preprocess input files. For each file input, this flag causes lychee to execute `COMMAND PATH` and process its standard output instead of the original contents of PATH. This allows you to diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 7a5947bbfa..a2db4b2746 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -174,7 +174,7 @@ pub enum ErrorKind { #[error("Preprocessor command '{command}' failed: {reason}")] PreprocessorError { /// The command which did not execute successfully - command: PathBuf, + command: String, /// The reason the command failed reason: String, }, @@ -336,7 +336,7 @@ impl ErrorKind { [name] => format!("An index file ({name}) is required"), [init @ .., tail] => format!("An index file ({}, or {}) is required", init.join(", "), tail), }.into(), - ErrorKind::PreprocessorError{command, reason} => Some(format!("Command '{}' failed {}. Check value of the pre option", command.to_string_lossy(), reason)) + ErrorKind::PreprocessorError{command, reason} => Some(format!("Command '{command}' failed {reason}. Check value of the pre option")) } } diff --git a/lychee-lib/src/types/preprocessor/mod.rs b/lychee-lib/src/types/preprocessor/mod.rs index 85c939ca58..0999b15332 100644 --- a/lychee-lib/src/types/preprocessor/mod.rs +++ b/lychee-lib/src/types/preprocessor/mod.rs @@ -1,4 +1,4 @@ -use std::{path::Path, path::PathBuf, process::Command}; +use std::{path::PathBuf, process::Command}; use serde::Deserialize; @@ -14,28 +14,19 @@ use super::{ErrorKind, Result}; /// create a shell script to specify it as preprocessor command. #[derive(Debug, Clone, Deserialize, PartialEq, Eq)] pub struct Preprocessor { - command: PathBuf, + command: String, } -impl Preprocessor { - /// Constructs a new [`Preprocessor`] from the given command, while - /// validating that the command exists. - pub fn new(command: &str) -> Result { - let command = PathBuf::from(command); - match command.metadata() { - Err(e) => Err(ErrorKind::PreprocessorError { - command, - reason: format!("command not found: {e}"), - }), - Ok(_) => Ok(Self { command }), - } +impl From for Preprocessor { + fn from(command: String) -> Self { + Self { command } } } impl Preprocessor { /// Try to invoke the preprocessor command with `path` as single argument /// and return the resulting stdout. - pub(crate) fn process(&self, path: &Path) -> Result { + pub(crate) fn process(&self, path: &PathBuf) -> Result { let output = Command::new(&self.command) .arg(path) .output() From 33141c9bd57e8eaaa68dc16a76066619c02a6cdb Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 13 Nov 2025 10:27:38 +1000 Subject: [PATCH 44/63] review comments aside from struct variant --- examples/collect_links/collect_links.rs | 2 +- lychee-bin/src/commands/check.rs | 35 +++++++++++++------------ lychee-lib/src/types/input/input.rs | 4 +-- lychee-lib/src/types/request.rs | 2 +- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs index 74e95b0f03..5820d6ceed 100644 --- a/examples/collect_links/collect_links.rs +++ b/examples/collect_links/collect_links.rs @@ -1,6 +1,6 @@ use lychee_lib::{Collector, Input, InputSource, RequestError}; use reqwest::Url; -use std::{collections::HashSet, path::PathBuf, result::Result}; +use std::{collections::HashSet, path::PathBuf}; use tokio_stream::StreamExt; #[tokio::main] diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 4e5d4cffed..bb6500882c 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -10,10 +10,10 @@ use reqwest::Url; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; +use lychee_lib::InputSource; use lychee_lib::RequestError; use lychee_lib::archive::Archive; use lychee_lib::{Client, ErrorKind, Request, Response, Uri}; -use lychee_lib::{InputSource, Result}; use lychee_lib::{ResponseBody, Status}; use crate::formatters::get_response_formatter; @@ -28,9 +28,9 @@ use super::CommandParams; pub(crate) async fn check( params: CommandParams, -) -> Result<(ResponseStats, Arc, ExitCode)> +) -> Result<(ResponseStats, Arc, ExitCode), ErrorKind> where - S: futures::Stream>, + S: futures::Stream>, { // Setup let (send_req, recv_req) = mpsc::channel(params.cfg.max_concurrency); @@ -177,19 +177,20 @@ async fn suggest_archived_links( // the show_results_task to finish async fn send_inputs_loop( requests: S, - send_req: mpsc::Sender>, + send_req: mpsc::Sender>, bar: Option, -) -> Result<()> +) -> Result<(), ErrorKind> where - S: futures::Stream>, + S: futures::Stream>, { tokio::pin!(requests); while let Some(request) = requests.next().await { if let Some(pb) = &bar { pb.inc_length(1); - if let Ok(request) = &request { - pb.set_message(request.to_string()); - } + match &request { + Ok(x) => pb.set_message(x.to_string()), + Err(e) => pb.set_message(e.to_string()), + }; } send_req.send(request).await.expect("Cannot send request"); } @@ -198,12 +199,12 @@ where /// Reads from the request channel and updates the progress bar status async fn progress_bar_task( - mut recv_resp: mpsc::Receiver>, + mut recv_resp: mpsc::Receiver>, verbose: Verbosity, pb: Option, formatter: Box, mut stats: ResponseStats, -) -> Result<(Option, ResponseStats)> { +) -> Result<(Option, ResponseStats), ErrorKind> { while let Some(response) = recv_resp.recv().await { let response = response?; show_progress( @@ -232,8 +233,8 @@ fn init_progress_bar(initial_message: &'static str) -> ProgressBar { } async fn request_channel_task( - recv_req: mpsc::Receiver>, - send_resp: mpsc::Sender>, + recv_req: mpsc::Receiver>, + send_resp: mpsc::Sender>, max_concurrency: usize, client: Client, cache: Arc, @@ -243,7 +244,7 @@ async fn request_channel_task( StreamExt::for_each_concurrent( ReceiverStream::new(recv_req), max_concurrency, - |request: std::result::Result| async { + |request: Result| async { let response = handle( &client, cache.clone(), @@ -293,9 +294,9 @@ async fn handle( client: &Client, cache: Arc, cache_exclude_status: HashSet, - request: std::result::Result, + request: Result, accept: HashSet, -) -> Result { +) -> Result { // Note that the RequestError cases bypass the cache. let request = match request { Ok(x) => x, @@ -374,7 +375,7 @@ fn show_progress( response: &Response, formatter: &dyn ResponseFormatter, verbose: &Verbosity, -) -> Result<()> { +) -> Result<(), ErrorKind> { // In case the log level is set to info, we want to show the detailed // response output. Otherwise, we only show the essential information // (typically the status code and the URL, but this is dependent on the diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index ba787777bb..9de6da9eaa 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -107,8 +107,8 @@ impl Input { // Handle simple cases that don't need resolution. Also, perform // simple *stateful* checks for more complex input sources. // - // However, stateless well-formedness checks (e.g., checking glob - // syntax) should be done in Input::new. + // Stateless well-formedness checks (e.g., checking glob syntax) + // are done in InputSource::new. match self.source { InputSource::RemoteUrl(url) => { match resolver.url_contents(*url).await { diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index ab792fd11e..db27458dcd 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -17,7 +17,7 @@ pub enum RequestError { #[error("Error reading input '{0}': {1}")] GetInputContent(InputSource, #[source] Box), - /// Unable to load an input source directly provided by the user. + /// Unable to load an input source directly specified by the user. #[error("Error reading user input '{0}': {1}")] UserInputContent(InputSource, #[source] Box), } From 012fd68010ffb7c28ff2b56a5ac1c12618f51c3f Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 13 Nov 2025 10:29:31 +1000 Subject: [PATCH 45/63] lint --- lychee-bin/src/commands/check.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index bb6500882c..d627f82e65 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -190,7 +190,7 @@ where match &request { Ok(x) => pb.set_message(x.to_string()), Err(e) => pb.set_message(e.to_string()), - }; + } } send_req.send(request).await.expect("Cannot send request"); } From 8c34cf41f018a57b153b4588faa84d648ed48d29 Mon Sep 17 00:00:00 2001 From: katrinafyi <39479354+katrinafyi@users.noreply.github.com> Date: Thu, 13 Nov 2025 10:46:12 +1000 Subject: [PATCH 46/63] capitalise paragraph comments Co-authored-by: Matthias Endler --- lychee-lib/src/types/input/input.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index 9de6da9eaa..cf56fa0423 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -124,8 +124,8 @@ impl Input { .map(|_| ()) .map_err(|e| ErrorKind::DirTraversal(ignore::Error::Io(e))) } else { - // this checks existence without requiring an open. opening here, - // then re-opening later, might cause problems with pipes. this + // This checks existence without requiring an open. Opening here, + // then re-opening later, might cause problems with pipes. This // does not validate permissions. path.metadata() .map(|_| ()) From e066600db1ab92a60b374a0743764cb113ef7d29 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 13 Nov 2025 11:00:11 +1000 Subject: [PATCH 47/63] comment for internal function --- lychee-lib/src/utils/request.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 1610f692e2..e0787c8bde 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -191,6 +191,8 @@ mod tests { use super::*; + /// Create requests from the given raw URIs and returns requests that were + /// constructed successfully, silently ignoring link parsing errors. fn create_ok_only( uris: Vec, source: &ResolvedInputSource, From 918e216500dfe9d6b235db10c31122ace17963a8 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 13 Nov 2025 11:13:04 +1000 Subject: [PATCH 48/63] add light test case test_create_request_from_relative_file_path_errors --- lychee-lib/src/utils/request.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index e0787c8bde..b6f904dda7 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -519,6 +519,33 @@ mod tests { ); } + #[test] + fn test_create_request_from_relative_file_path_errors() { + // relative links unsupported from stdin + assert!( + create_request( + &raw_uri("file.html"), + &ResolvedInputSource::Stdin, + None, + None, + None, + ) + .is_err() + ); + + // error because no root-dir and no base-url + assert!( + create_request( + &raw_uri("/file.html"), + &ResolvedInputSource::FsPath(PathBuf::from("page.html")), + None, + None, + None, + ) + .is_err() + ); + } + #[test] fn test_create_request_from_absolute_file_path() { let base = Base::Local(PathBuf::from("/tmp/lychee")); From acfa23c131877d56c13a890903e183d0a3ef3c46 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 13 Nov 2025 11:14:12 +1000 Subject: [PATCH 49/63] comment2 --- lychee-lib/src/utils/request.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index b6f904dda7..7223e6bd2a 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -193,6 +193,10 @@ mod tests { /// Create requests from the given raw URIs and returns requests that were /// constructed successfully, silently ignoring link parsing errors. + /// + /// This reduces the Result handling which is needed in test cases. Test + /// cases can still detect the unexpected appearance of errors by the + /// length being different. fn create_ok_only( uris: Vec, source: &ResolvedInputSource, From 759b6912af8a830e9280905580140a5b1aef949a Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 16 Nov 2025 15:12:59 +1000 Subject: [PATCH 50/63] RequestBatch --- lychee-lib/src/utils/request.rs | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 7223e6bd2a..fe0b0cea1b 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -12,6 +12,11 @@ use crate::{ utils::{path, url}, }; +pub(crate) struct RequestBatch { + requests: HashSet, + errors: Vec, +} + /// Extract basic auth credentials for a given URL. pub(crate) fn extract_credentials( extractor: Option<&BasicAuthExtractor>, @@ -122,16 +127,27 @@ pub(crate) fn create( root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, -) -> HashSet> { +) -> RequestBatch { let base = base.cloned().or_else(|| Base::from_source(source)); - uris.into_iter() - .map(|raw_uri| { - create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| { - RequestError::CreateRequestItem(raw_uri.clone(), source.clone(), Box::new(e)) - }) - }) - .collect() + let mut requests = HashSet::::new(); + let mut errors = Vec::::new(); + + for raw_uri in uris.into_iter() { + let result = create_request(&raw_uri, source, root_dir, base.as_ref(), extractor); + match result { + Ok(request) => { + requests.insert(request); + } + Err(e) => errors.push(RequestError::CreateRequestItem( + raw_uri.clone(), + source.clone(), + Box::new(e), + )), + } + } + + RequestBatch { requests, errors } } /// Create a URI from a path From bbbd94b2a59a0dda5c339d29d1e053be96063081 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 16 Nov 2025 15:13:03 +1000 Subject: [PATCH 51/63] Revert "RequestBatch" This reverts commit 759b6912af8a830e9280905580140a5b1aef949a. --- lychee-lib/src/utils/request.rs | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index fe0b0cea1b..7223e6bd2a 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -12,11 +12,6 @@ use crate::{ utils::{path, url}, }; -pub(crate) struct RequestBatch { - requests: HashSet, - errors: Vec, -} - /// Extract basic auth credentials for a given URL. pub(crate) fn extract_credentials( extractor: Option<&BasicAuthExtractor>, @@ -127,27 +122,16 @@ pub(crate) fn create( root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, -) -> RequestBatch { +) -> HashSet> { let base = base.cloned().or_else(|| Base::from_source(source)); - let mut requests = HashSet::::new(); - let mut errors = Vec::::new(); - - for raw_uri in uris.into_iter() { - let result = create_request(&raw_uri, source, root_dir, base.as_ref(), extractor); - match result { - Ok(request) => { - requests.insert(request); - } - Err(e) => errors.push(RequestError::CreateRequestItem( - raw_uri.clone(), - source.clone(), - Box::new(e), - )), - } - } - - RequestBatch { requests, errors } + uris.into_iter() + .map(|raw_uri| { + create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| { + RequestError::CreateRequestItem(raw_uri.clone(), source.clone(), Box::new(e)) + }) + }) + .collect() } /// Create a URI from a path From b7bb1742bbdbe687e7e0ee1eb177ceec827eaf60 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 16 Nov 2025 15:14:52 +1000 Subject: [PATCH 52/63] use Vec> --- clippy.toml | 8 -------- lychee-lib/src/utils/request.rs | 7 ++----- 2 files changed, 2 insertions(+), 13 deletions(-) delete mode 100644 clippy.toml diff --git a/clippy.toml b/clippy.toml deleted file mode 100644 index ebb0a1f9d5..0000000000 --- a/clippy.toml +++ /dev/null @@ -1,8 +0,0 @@ -# This is needed when using ErrorKind within a hashed collection. This is because -# ErrorKind contains some errors which have a backtrace and those backtraces -# make use of mutable cells. We expect this is safe, since the mutability is -# contained within Once and so probably only mutated during initialisation. -# -# Therefore, the hash value *should* remain stable while it's within the hashed -# collection. -ignore-interior-mutability = ["lychee_lib::ErrorKind"] diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 7223e6bd2a..4f29e06634 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -1,9 +1,6 @@ use percent_encoding::percent_decode_str; use reqwest::Url; -use std::{ - collections::HashSet, - path::{Path, PathBuf}, -}; +use std::path::{Path, PathBuf}; use crate::{ Base, BasicAuthCredentials, ErrorKind, Request, RequestError, Result, Uri, @@ -122,7 +119,7 @@ pub(crate) fn create( root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, -) -> HashSet> { +) -> Vec> { let base = base.cloned().or_else(|| Base::from_source(source)); uris.into_iter() From 438a2d3a7d4dfc5280993c546edd91288a41c51b Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 16 Nov 2025 15:23:04 +1000 Subject: [PATCH 53/63] add LycheeResult to disambiguate from std Result --- lychee-bin/src/commands/dump.rs | 5 ++--- lychee-bin/src/commands/mod.rs | 7 ++----- lychee-lib/src/collector.rs | 24 ++++++++++++------------ lychee-lib/src/lib.rs | 6 +++--- lychee-lib/src/types/input/input.rs | 21 ++++++++++++--------- lychee-lib/src/types/mod.rs | 3 +++ lychee-lib/src/utils/request.rs | 16 ++++++++-------- 7 files changed, 42 insertions(+), 40 deletions(-) diff --git a/lychee-bin/src/commands/dump.rs b/lychee-bin/src/commands/dump.rs index 93cc8b215d..88570933e3 100644 --- a/lychee-bin/src/commands/dump.rs +++ b/lychee-bin/src/commands/dump.rs @@ -2,7 +2,6 @@ use log::error; use log::warn; use lychee_lib::Request; use lychee_lib::RequestError; -use lychee_lib::Result; use std::fs; use std::io::{self, Write}; use tokio_stream::StreamExt; @@ -13,9 +12,9 @@ use crate::verbosity::Verbosity; use super::CommandParams; /// Dump all detected links to stdout without checking them -pub(crate) async fn dump(params: CommandParams) -> Result +pub(crate) async fn dump(params: CommandParams) -> lychee_lib::Result where - S: futures::Stream>, + S: futures::Stream>, { let requests = params.requests; tokio::pin!(requests); diff --git a/lychee-bin/src/commands/mod.rs b/lychee-bin/src/commands/mod.rs index 989d2ef0ff..5b2c6f62db 100644 --- a/lychee-bin/src/commands/mod.rs +++ b/lychee-bin/src/commands/mod.rs @@ -15,13 +15,10 @@ use std::sync::Arc; use crate::cache::Cache; use crate::options::Config; use lychee_lib::RequestError; -use lychee_lib::Result; use lychee_lib::{Client, Request}; /// Parameters passed to every command -pub(crate) struct CommandParams< - S: futures::Stream>, -> { +pub(crate) struct CommandParams>> { pub(crate) client: Client, pub(crate) cache: Arc, pub(crate) requests: S, @@ -33,7 +30,7 @@ pub(crate) struct CommandParams< /// # Errors /// /// Returns an error if the output file cannot be opened. -fn create_writer(output: Option) -> Result> { +fn create_writer(output: Option) -> lychee_lib::Result> { Ok(match output { Some(path) => Box::new(fs::OpenOptions::new().append(true).open(path)?), None => Box::new(io::stdout().lock()), diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 48d53fb3be..20c7d26630 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -5,8 +5,8 @@ use crate::filter::PathExcludes; use crate::types::resolver::UrlContentResolver; use crate::{ - Base, Input, Request, RequestError, Result, basic_auth::BasicAuthExtractor, extract::Extractor, - types::FileExtensions, types::uri::raw::RawUri, utils::request, + Base, Input, LycheeResult, Request, RequestError, basic_auth::BasicAuthExtractor, + extract::Extractor, types::FileExtensions, types::uri::raw::RawUri, utils::request, }; use futures::TryStreamExt; use futures::{ @@ -72,7 +72,7 @@ impl Collector { /// /// Returns an `Err` if the `root_dir` is not an absolute path /// or if the reqwest `Client` fails to build - pub fn new(root_dir: Option, base: Option) -> Result { + pub fn new(root_dir: Option, base: Option) -> LycheeResult { if let Some(root_dir) = &root_dir && root_dir.is_relative() { @@ -183,7 +183,7 @@ impl Collector { pub fn collect_links( self, inputs: HashSet, - ) -> impl Stream> { + ) -> impl Stream> { self.collect_links_from_file_types(inputs, crate::types::FileType::default_extensions()) } @@ -198,7 +198,7 @@ impl Collector { self, inputs: HashSet, extensions: FileExtensions, - ) -> impl Stream> { + ) -> impl Stream> { let skip_missing_inputs = self.skip_missing_inputs; let skip_hidden = self.skip_hidden; let skip_ignored = self.skip_ignored; @@ -258,7 +258,7 @@ impl Collector { base.as_ref(), basic_auth_extractor.as_ref(), ); - std::result::Result::Ok(stream::iter(requests)) + Result::Ok(stream::iter(requests)) } }) .try_flatten() @@ -276,7 +276,7 @@ mod tests { use super::*; use crate::{ - Result, Uri, + LycheeResult, Uri, filter::PathExcludes, types::{FileType, Input, InputSource}, }; @@ -286,7 +286,7 @@ mod tests { inputs: HashSet, root_dir: Option, base: Option, - ) -> Result> { + ) -> LycheeResult> { let responses = Collector::new(root_dir, base)?.collect_links(inputs); Ok(responses.map(|r| r.unwrap().uri).collect().await) } @@ -300,7 +300,7 @@ mod tests { root_dir: Option, base: Option, extensions: FileExtensions, - ) -> Result> { + ) -> LycheeResult> { let responses = Collector::new(root_dir, base)? .include_verbatim(true) .collect_links_from_file_types(inputs, extensions); @@ -314,7 +314,7 @@ mod tests { const TEST_GLOB_2_MAIL: &str = "test@glob-2.io"; #[tokio::test] - async fn test_file_without_extension_is_plaintext() -> Result<()> { + async fn test_file_without_extension_is_plaintext() -> LycheeResult<()> { let temp_dir = tempfile::tempdir().unwrap(); // Treat as plaintext file (no extension) let file_path = temp_dir.path().join("README"); @@ -339,7 +339,7 @@ mod tests { } #[tokio::test] - async fn test_url_without_extension_is_html() -> Result<()> { + async fn test_url_without_extension_is_html() -> LycheeResult<()> { let input = Input::new("https://example.com/", None, true)?; let contents: Vec<_> = input .get_contents( @@ -360,7 +360,7 @@ mod tests { } #[tokio::test] - async fn test_collect_links() -> Result<()> { + async fn test_collect_links() -> LycheeResult<()> { let temp_dir = tempfile::tempdir().unwrap(); let temp_dir_path = temp_dir.path(); diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index 04abc805d6..6c917fda92 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -95,8 +95,8 @@ pub use crate::{ types::{ AcceptRange, AcceptRangeError, Base, BasicAuthCredentials, BasicAuthSelector, CacheStatus, CookieJar, ErrorKind, FileExtensions, FileType, Input, InputContent, InputResolver, - InputSource, Preprocessor, Redirects, Request, RequestError, ResolvedInputSource, Response, - ResponseBody, Result, Status, StatusCodeExcluder, StatusCodeSelector, uri::raw::RawUri, - uri::valid::Uri, + InputSource, LycheeResult, Preprocessor, Redirects, Request, RequestError, + ResolvedInputSource, Response, ResponseBody, Result, Status, StatusCodeExcluder, + StatusCodeSelector, uri::raw::RawUri, uri::valid::Uri, }, }; diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index cf56fa0423..9fd80992c8 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -9,7 +9,7 @@ use super::source::{InputSource, ResolvedInputSource}; use crate::Preprocessor; use crate::filter::PathExcludes; use crate::types::{FileType, RequestError, file::FileExtensions, resolver::UrlContentResolver}; -use crate::{ErrorKind, Result}; +use crate::{ErrorKind, LycheeResult}; use async_stream::try_stream; use futures::stream::{Stream, StreamExt}; use std::path::{Path, PathBuf}; @@ -42,7 +42,7 @@ impl Input { input: &str, file_type_hint: Option, glob_ignore_case: bool, - ) -> Result { + ) -> LycheeResult { let source = InputSource::new(input, glob_ignore_case)?; Ok(Self { source, @@ -57,7 +57,7 @@ impl Input { /// Returns an error if: /// - the input does not exist (i.e. the path is invalid) /// - the input cannot be parsed as a URL - pub fn from_value(value: &str) -> Result { + pub fn from_value(value: &str) -> LycheeResult { Self::new(value, None, false) } @@ -95,7 +95,7 @@ impl Input { resolver: UrlContentResolver, excluded_paths: PathExcludes, preprocessor: Option, - ) -> impl Stream> { + ) -> impl Stream> { try_stream! { let source = self.source.clone(); @@ -223,7 +223,7 @@ impl Input { skip_hidden: bool, skip_ignored: bool, excluded_paths: &PathExcludes, - ) -> impl Stream> { + ) -> impl Stream> { InputResolver::resolve( &self, file_extensions, @@ -250,7 +250,7 @@ impl Input { pub async fn path_content + AsRef + Clone>( path: P, preprocessor: Option<&Preprocessor>, - ) -> Result { + ) -> LycheeResult { let path = path.into(); let content = Self::get_content(&path, preprocessor).await?; @@ -266,7 +266,7 @@ impl Input { /// # Errors /// /// Returns an error if stdin cannot be read - pub async fn stdin_content(file_type_hint: Option) -> Result { + pub async fn stdin_content(file_type_hint: Option) -> LycheeResult { let mut content = String::new(); let mut stdin = stdin(); stdin.read_to_string(&mut content).await?; @@ -288,7 +288,10 @@ impl Input { /// Get content of file. /// Get preprocessed file content if [`Preprocessor`] is [`Some`] - async fn get_content(path: &PathBuf, preprocessor: Option<&Preprocessor>) -> Result { + async fn get_content( + path: &PathBuf, + preprocessor: Option<&Preprocessor>, + ) -> LycheeResult { if let Some(pre) = preprocessor { pre.process(path) } else { @@ -302,7 +305,7 @@ impl Input { impl TryFrom<&str> for Input { type Error = crate::ErrorKind; - fn try_from(value: &str) -> std::result::Result { + fn try_from(value: &str) -> Result { Self::from_value(value) } } diff --git a/lychee-lib/src/types/mod.rs b/lychee-lib/src/types/mod.rs index 3d44747a7d..f3d95ec60e 100644 --- a/lychee-lib/src/types/mod.rs +++ b/lychee-lib/src/types/mod.rs @@ -35,3 +35,6 @@ pub use status_code::*; /// The lychee `Result` type pub type Result = std::result::Result; + +/// The lychee `Result` type, aliased to avoid conflicting with std::result::Result. +pub type LycheeResult = Result; diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 4f29e06634..b7241c32b6 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -3,7 +3,7 @@ use reqwest::Url; use std::path::{Path, PathBuf}; use crate::{ - Base, BasicAuthCredentials, ErrorKind, Request, RequestError, Result, Uri, + Base, BasicAuthCredentials, ErrorKind, LycheeResult, Request, RequestError, Uri, basic_auth::BasicAuthExtractor, types::{ResolvedInputSource, uri::raw::RawUri}, utils::{path, url}, @@ -24,7 +24,7 @@ fn create_request( root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, -) -> Result { +) -> LycheeResult { let uri = try_parse_into_uri(raw_uri, source, root_dir, base)?; let source = source.clone(); let element = raw_uri.element.clone(); @@ -50,7 +50,7 @@ fn try_parse_into_uri( source: &ResolvedInputSource, root_dir: Option<&PathBuf>, base: Option<&Base>, -) -> Result { +) -> LycheeResult { let text = prepend_root_dir_if_absolute_local_link(&raw_uri.text, root_dir); let uri = match Uri::try_from(raw_uri.clone()) { Ok(uri) => uri, @@ -86,7 +86,7 @@ fn create_uri_from_file_path( file_path: &Path, link_text: &str, ignore_absolute_local_links: bool, -) -> Result { +) -> LycheeResult { let target_path = if is_anchor(link_text) { // For anchors, we need to append the anchor to the file name. let file_name = file_path @@ -119,7 +119,7 @@ pub(crate) fn create( root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, -) -> Vec> { +) -> Vec> { let base = base.cloned().or_else(|| Base::from_source(source)); uris.into_iter() @@ -146,7 +146,7 @@ fn resolve_and_create_url( src_path: &Path, dest_path: &str, ignore_absolute_local_links: bool, -) -> Result { +) -> LycheeResult { let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path); // Decode the destination path to avoid double-encoding @@ -191,7 +191,7 @@ mod tests { /// Create requests from the given raw URIs and returns requests that were /// constructed successfully, silently ignoring link parsing errors. /// - /// This reduces the Result handling which is needed in test cases. Test + /// This reduces the LycheeResult handling which is needed in test cases. Test /// cases can still detect the unexpected appearance of errors by the /// length being different. fn create_ok_only( @@ -203,7 +203,7 @@ mod tests { ) -> HashSet { create(uris, source, root_dir, base, extractor) .into_iter() - .filter_map(std::result::Result::ok) + .filter_map(Result::ok) .collect() } From 50e91d1936082e4ec2c7a55f3b005feafa24d427 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 16 Nov 2025 15:34:25 +1000 Subject: [PATCH 54/63] manually deduplicate valid requests but not errors. --- lychee-lib/src/types/mod.rs | 2 +- lychee-lib/src/utils/request.rs | 36 ++++++++++++++++++++++++--------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/lychee-lib/src/types/mod.rs b/lychee-lib/src/types/mod.rs index f3d95ec60e..6fdee6de1c 100644 --- a/lychee-lib/src/types/mod.rs +++ b/lychee-lib/src/types/mod.rs @@ -36,5 +36,5 @@ pub use status_code::*; /// The lychee `Result` type pub type Result = std::result::Result; -/// The lychee `Result` type, aliased to avoid conflicting with std::result::Result. +/// The lychee `Result` type, aliased to avoid conflicting with [`std::result::Result`]. pub type LycheeResult = Result; diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index b7241c32b6..dd4835da46 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -1,5 +1,6 @@ use percent_encoding::percent_decode_str; use reqwest::Url; +use std::collections::HashSet; use std::path::{Path, PathBuf}; use crate::{ @@ -109,10 +110,11 @@ fn create_uri_from_file_path( } /// Create requests out of the collected URLs. -/// Only keeps "valid" URLs. This filters out anchors for example. +/// Returns a vector of valid URLs and errors. Valid URLs are deduplicated, +/// request errors are not deduplicated. /// /// If a URLs is ignored (because of the current settings), -/// it will not be added to the `HashSet`. +/// it will not be added to the results. pub(crate) fn create( uris: Vec, source: &ResolvedInputSource, @@ -122,13 +124,27 @@ pub(crate) fn create( ) -> Vec> { let base = base.cloned().or_else(|| Base::from_source(source)); - uris.into_iter() - .map(|raw_uri| { - create_request(&raw_uri, source, root_dir, base.as_ref(), extractor).map_err(|e| { - RequestError::CreateRequestItem(raw_uri.clone(), source.clone(), Box::new(e)) - }) - }) - .collect() + let mut requests = HashSet::::new(); + let mut errors = Vec::::new(); + + for raw_uri in uris.into_iter() { + let result = create_request(&raw_uri, source, root_dir, base.as_ref(), extractor); + match result { + Ok(request) => { + requests.insert(request); + } + Err(e) => errors.push(RequestError::CreateRequestItem( + raw_uri.clone(), + source.clone(), + Box::new(e), + )), + } + } + + let errs_iter = errors.into_iter().map(Result::Err); + let reqs_iter = requests.into_iter().map(Result::Ok); + + reqs_iter.chain(errs_iter).collect() } /// Create a URI from a path @@ -200,7 +216,7 @@ mod tests { root_dir: Option<&PathBuf>, base: Option<&Base>, extractor: Option<&BasicAuthExtractor>, - ) -> HashSet { + ) -> Vec { create(uris, source, root_dir, base, extractor) .into_iter() .filter_map(Result::ok) From 8a6bc3fc695b127b0860ec3989a36d3e6c3272e1 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 16 Nov 2025 15:37:35 +1000 Subject: [PATCH 55/63] change preprocessor tests to expect link checking errors --- lychee-bin/tests/cli.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index e443259176..2742aa4bd0 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -3072,7 +3072,8 @@ The config file should contain every possible key for documentation purposes." .arg(file) .assert() .failure() - .stderr(contains("Error: Preprocessor command 'program does not exist' failed: could not start: No such file or directory (os error 2)")); + .code(2) + .stdout(contains("Preprocessor command 'program does not exist' failed: could not start: No such file or directory")); } #[test] @@ -3085,8 +3086,10 @@ The config file should contain every possible key for documentation purposes." .arg(&file) .assert() .failure() - .stderr(contains(format!( - "Error: Preprocessor command '{}' failed: exited with non-zero code: ", script.as_os_str().to_str().unwrap() + .code(2) + .stdout(contains(format!( + "Preprocessor command '{}' failed: exited with non-zero code: ", + script.as_os_str().to_str().unwrap() ))); let script = fixtures_path!().join("pre").join("error_message.sh"); @@ -3096,8 +3099,10 @@ The config file should contain every possible key for documentation purposes." .arg(file) .assert() .failure() - .stderr(contains(format!( - "Error: Preprocessor command '{}' failed: exited with non-zero code: Some error message", script.as_os_str().to_str().unwrap() + .code(2) + .stdout(contains(format!( + "Preprocessor command '{}' failed: exited with non-zero code: Some error message", + script.as_os_str().to_str().unwrap() ))); } } From 9a34dac9c0a20abc947276046ec15689e0934ec6 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 16 Nov 2025 15:45:43 +1000 Subject: [PATCH 56/63] clippy --- lychee-lib/src/utils/request.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index dd4835da46..d00fe988dc 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -127,7 +127,7 @@ pub(crate) fn create( let mut requests = HashSet::::new(); let mut errors = Vec::::new(); - for raw_uri in uris.into_iter() { + for raw_uri in uris { let result = create_request(&raw_uri, source, root_dir, base.as_ref(), extractor); match result { Ok(request) => { @@ -207,7 +207,7 @@ mod tests { /// Create requests from the given raw URIs and returns requests that were /// constructed successfully, silently ignoring link parsing errors. /// - /// This reduces the LycheeResult handling which is needed in test cases. Test + /// This reduces the `Result` handling which is needed in test cases. Test /// cases can still detect the unexpected appearance of errors by the /// length being different. fn create_ok_only( From 6db8db46380787775c0e106378ee21ff46e0bd8b Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 16 Nov 2025 15:49:32 +1000 Subject: [PATCH 57/63] merge main_command removal --- lychee-bin/tests/cli.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index e0721a1a84..b435e3514e 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -3003,7 +3003,7 @@ The config file should contain every possible key for documentation purposes." // should fail as a CLI error, not a link checking error. #[test] fn test_invalid_user_input_source() -> Result<()> { - main_command!() + cargo_bin_cmd!() .arg("http://website.invalid") .assert() .failure() @@ -3013,7 +3013,7 @@ The config file should contain every possible key for documentation purposes." // doesn't seem to be an equivalent to chmod on the windows API: // https://doc.rust-lang.org/std/fs/struct.Permissions.html - main_command!() + cargo_bin_cmd!() .arg("invalid-glob[") .assert() .failure() From 25b1290f38fdd791928383a67deea9816de8d635 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 16 Nov 2025 16:09:03 +1000 Subject: [PATCH 58/63] change to `error:` and add into_response helper function --- lychee-bin/src/commands/check.rs | 13 +------------ lychee-lib/src/types/request.rs | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index d627f82e65..e84726b623 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -300,18 +300,7 @@ async fn handle( // Note that the RequestError cases bypass the cache. let request = match request { Ok(x) => x, - Err(e @ RequestError::UserInputContent { .. }) => { - return Err(e.into_error()); - } - Err(e) => { - let src = e.input_source(); - - return Ok(Response::new( - Uri::try_from("error://").unwrap(), - Status::RequestError(e), - src, - )); - } + Err(e) => return e.into_response(), }; let uri = request.uri.clone(); diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index db27458dcd..532f2d3966 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -1,7 +1,7 @@ use std::{borrow::Cow, convert::TryFrom, fmt::Display}; use thiserror::Error; -use crate::{BasicAuthCredentials, ErrorKind, RawUri, Uri}; +use crate::{BasicAuthCredentials, ErrorKind, RawUri, Response, Status, Uri}; use crate::{InputSource, ResolvedInputSource}; /// An error which occurs while trying to construct a [`Request`] object. @@ -51,6 +51,25 @@ impl RequestError { Self::GetInputContent(src, _) | Self::UserInputContent(src, _) => src.clone(), } } + + /// Convert this request error into a failure [`Response`] for reporting + /// purposes. However, if this error was caused by failing to load a + /// user-specified input, then an Err is returned for propagating back + /// to the user. + #[must_use] + pub fn into_response(self) -> Result { + match self { + RequestError::UserInputContent(_, e) => Err(*e), + e => { + let src = e.input_source(); + Ok(Response::new( + Uri::try_from("error:").unwrap(), + Status::RequestError(e), + src, + )) + } + } + } } /// A request type that can be handle by lychee From 70f793b5f74c3563d8a9c0eb8d8c987577d9c546 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 16 Nov 2025 16:23:05 +1000 Subject: [PATCH 59/63] move request_error into separate file. --- lychee-lib/src/types/mod.rs | 4 +- lychee-lib/src/types/request.rs | 73 +--------------------- lychee-lib/src/types/request_error.rs | 89 +++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 72 deletions(-) create mode 100644 lychee-lib/src/types/request_error.rs diff --git a/lychee-lib/src/types/mod.rs b/lychee-lib/src/types/mod.rs index 6fdee6de1c..3b019e3004 100644 --- a/lychee-lib/src/types/mod.rs +++ b/lychee-lib/src/types/mod.rs @@ -12,6 +12,7 @@ pub(crate) mod mail; mod preprocessor; pub(crate) mod redirect_history; mod request; +mod request_error; pub(crate) mod resolver; mod response; mod status; @@ -28,7 +29,8 @@ pub use file::{FileExtensions, FileType}; pub use input::{Input, InputContent, InputResolver, InputSource, ResolvedInputSource}; pub use preprocessor::Preprocessor; pub use redirect_history::Redirects; -pub use request::{Request, RequestError}; +pub use request::Request; +pub use request_error::RequestError; pub use response::{Response, ResponseBody}; pub use status::Status; pub use status_code::*; diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index 532f2d3966..c03f29665d 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -1,76 +1,7 @@ use std::{borrow::Cow, convert::TryFrom, fmt::Display}; -use thiserror::Error; -use crate::{BasicAuthCredentials, ErrorKind, RawUri, Response, Status, Uri}; -use crate::{InputSource, ResolvedInputSource}; - -/// An error which occurs while trying to construct a [`Request`] object. -/// That is, an error which happens while trying to load links from an input -/// source. -#[derive(Error, Debug, PartialEq, Eq, Hash)] -pub enum RequestError { - /// Unable to construct a URL for a link appearing within the given source. - #[error("Error building URL for {0}: {2}")] - CreateRequestItem(RawUri, ResolvedInputSource, #[source] Box), - - /// Unable to load the content of an input source. - #[error("Error reading input '{0}': {1}")] - GetInputContent(InputSource, #[source] Box), - - /// Unable to load an input source directly specified by the user. - #[error("Error reading user input '{0}': {1}")] - UserInputContent(InputSource, #[source] Box), -} - -impl RequestError { - /// Get the underlying cause of this [`RequestError`]. - #[must_use] - pub const fn error(&self) -> &ErrorKind { - match self { - Self::CreateRequestItem(_, _, e) - | Self::GetInputContent(_, e) - | Self::UserInputContent(_, e) => e, - } - } - - /// Convert this [`RequestError`] into its source error. - #[must_use] - pub fn into_error(self) -> ErrorKind { - match self { - Self::CreateRequestItem(_, _, e) - | Self::GetInputContent(_, e) - | Self::UserInputContent(_, e) => *e, - } - } - - /// Get (a clone of) the input source within which the error happened. - #[must_use] - pub fn input_source(&self) -> InputSource { - match self { - Self::CreateRequestItem(_, src, _) => src.clone().into(), - Self::GetInputContent(src, _) | Self::UserInputContent(src, _) => src.clone(), - } - } - - /// Convert this request error into a failure [`Response`] for reporting - /// purposes. However, if this error was caused by failing to load a - /// user-specified input, then an Err is returned for propagating back - /// to the user. - #[must_use] - pub fn into_response(self) -> Result { - match self { - RequestError::UserInputContent(_, e) => Err(*e), - e => { - let src = e.input_source(); - Ok(Response::new( - Uri::try_from("error:").unwrap(), - Status::RequestError(e), - src, - )) - } - } - } -} +use crate::ResolvedInputSource; +use crate::{BasicAuthCredentials, ErrorKind, Uri}; /// A request type that can be handle by lychee #[derive(Debug, PartialEq, Eq, Hash, Clone)] diff --git a/lychee-lib/src/types/request_error.rs b/lychee-lib/src/types/request_error.rs new file mode 100644 index 0000000000..5fd55092c6 --- /dev/null +++ b/lychee-lib/src/types/request_error.rs @@ -0,0 +1,89 @@ +use std::convert::TryFrom; +use thiserror::Error; + +use crate::{ErrorKind, RawUri, Response, Status, Uri}; +use crate::{InputSource, ResolvedInputSource}; + +const ERROR_URL: &str = "error:"; + +/// An error which occurs while trying to construct a [`Request`] object. +/// That is, an error which happens while trying to load links from an input +/// source. +#[derive(Error, Debug, PartialEq, Eq, Hash)] +pub enum RequestError { + /// Unable to construct a URL for a link appearing within the given source. + #[error("Error building URL for {0}: {2}")] + CreateRequestItem(RawUri, ResolvedInputSource, #[source] Box), + + /// Unable to load the content of an input source. + #[error("Error reading input '{0}': {1}")] + GetInputContent(InputSource, #[source] Box), + + /// Unable to load an input source directly specified by the user. + #[error("Error reading user input '{0}': {1}")] + UserInputContent(InputSource, #[source] Box), +} + +impl RequestError { + /// Get the underlying cause of this [`RequestError`]. + #[must_use] + pub const fn error(&self) -> &ErrorKind { + match self { + Self::CreateRequestItem(_, _, e) + | Self::GetInputContent(_, e) + | Self::UserInputContent(_, e) => e, + } + } + + /// Convert this [`RequestError`] into its source error. + #[must_use] + pub fn into_error(self) -> ErrorKind { + match self { + Self::CreateRequestItem(_, _, e) + | Self::GetInputContent(_, e) + | Self::UserInputContent(_, e) => *e, + } + } + + /// Get (a clone of) the input source within which the error happened. + #[must_use] + pub fn input_source(&self) -> InputSource { + match self { + Self::CreateRequestItem(_, src, _) => src.clone().into(), + Self::GetInputContent(src, _) | Self::UserInputContent(src, _) => src.clone(), + } + } + + /// Convert this request error into a (failed) [`Response`] for reporting + /// purposes. + /// + /// # Errors + /// + /// If this `RequestError` was caused by failing to load a user-specified + /// input, the underlying cause of the `RequestError` will be returned + /// as an Err. This allows the error to be propagated back to the user. + #[allow(clippy::missing_panics_doc)] + pub fn into_response(self) -> Result { + match self { + RequestError::UserInputContent(_, e) => Err(*e), + e => { + let src = e.input_source(); + Ok(Response::new( + Uri::try_from(ERROR_URL).unwrap(), + Status::RequestError(e), + src, + )) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::{ERROR_URL, Uri}; + + #[test] + fn test_error_url_parses() { + assert!(Uri::try_from(ERROR_URL).is_ok()); + } +} From a6ba2a1359d4d77eb8304589abb396b622679363 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 16 Nov 2025 16:28:19 +1000 Subject: [PATCH 60/63] revert --- lychee-lib/src/types/request.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index c03f29665d..da6844398a 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -1,8 +1,9 @@ use std::{borrow::Cow, convert::TryFrom, fmt::Display}; -use crate::ResolvedInputSource; use crate::{BasicAuthCredentials, ErrorKind, Uri}; +use super::ResolvedInputSource; + /// A request type that can be handle by lychee #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct Request { From ec943db2a9fa40597dd2346625bfab9e3dbb8320 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 17 Nov 2025 21:16:57 +1000 Subject: [PATCH 61/63] remove box (thanks to Thomas Zahner) --- lychee-lib/src/types/input/input.rs | 4 ++-- lychee-lib/src/types/request_error.rs | 10 +++++----- lychee-lib/src/utils/request.rs | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lychee-lib/src/types/input/input.rs b/lychee-lib/src/types/input/input.rs index 9fd80992c8..eadd404b74 100644 --- a/lychee-lib/src/types/input/input.rs +++ b/lychee-lib/src/types/input/input.rs @@ -100,9 +100,9 @@ impl Input { let source = self.source.clone(); let user_input_error = - move |e: ErrorKind| RequestError::UserInputContent(source.clone(), Box::new(e)); + move |e: ErrorKind| RequestError::UserInputContent(source.clone(), e); let discovered_input_error = - |e: ErrorKind| RequestError::GetInputContent(self.source.clone(), Box::new(e)); + |e: ErrorKind| RequestError::GetInputContent(self.source.clone(), e); // Handle simple cases that don't need resolution. Also, perform // simple *stateful* checks for more complex input sources. diff --git a/lychee-lib/src/types/request_error.rs b/lychee-lib/src/types/request_error.rs index 5fd55092c6..008f51d43e 100644 --- a/lychee-lib/src/types/request_error.rs +++ b/lychee-lib/src/types/request_error.rs @@ -13,15 +13,15 @@ const ERROR_URL: &str = "error:"; pub enum RequestError { /// Unable to construct a URL for a link appearing within the given source. #[error("Error building URL for {0}: {2}")] - CreateRequestItem(RawUri, ResolvedInputSource, #[source] Box), + CreateRequestItem(RawUri, ResolvedInputSource, #[source] ErrorKind), /// Unable to load the content of an input source. #[error("Error reading input '{0}': {1}")] - GetInputContent(InputSource, #[source] Box), + GetInputContent(InputSource, #[source] ErrorKind), /// Unable to load an input source directly specified by the user. #[error("Error reading user input '{0}': {1}")] - UserInputContent(InputSource, #[source] Box), + UserInputContent(InputSource, #[source] ErrorKind), } impl RequestError { @@ -41,7 +41,7 @@ impl RequestError { match self { Self::CreateRequestItem(_, _, e) | Self::GetInputContent(_, e) - | Self::UserInputContent(_, e) => *e, + | Self::UserInputContent(_, e) => e, } } @@ -65,7 +65,7 @@ impl RequestError { #[allow(clippy::missing_panics_doc)] pub fn into_response(self) -> Result { match self { - RequestError::UserInputContent(_, e) => Err(*e), + RequestError::UserInputContent(_, e) => Err(e), e => { let src = e.input_source(); Ok(Response::new( diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index d00fe988dc..2f8bc0afba 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -136,7 +136,7 @@ pub(crate) fn create( Err(e) => errors.push(RequestError::CreateRequestItem( raw_uri.clone(), source.clone(), - Box::new(e), + e, )), } } From fdbf533b8671cff3ca8d643301818c8d183c245f Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 17 Nov 2025 21:12:20 +1000 Subject: [PATCH 62/63] use lazylock --- lychee-lib/src/types/request_error.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lychee-lib/src/types/request_error.rs b/lychee-lib/src/types/request_error.rs index 008f51d43e..aa06973340 100644 --- a/lychee-lib/src/types/request_error.rs +++ b/lychee-lib/src/types/request_error.rs @@ -1,10 +1,11 @@ use std::convert::TryFrom; +use std::sync::LazyLock; use thiserror::Error; use crate::{ErrorKind, RawUri, Response, Status, Uri}; use crate::{InputSource, ResolvedInputSource}; -const ERROR_URL: &str = "error:"; +static ERROR_URI: LazyLock = LazyLock::new(|| Uri::try_from("error:").unwrap()); /// An error which occurs while trying to construct a [`Request`] object. /// That is, an error which happens while trying to load links from an input @@ -62,14 +63,13 @@ impl RequestError { /// If this `RequestError` was caused by failing to load a user-specified /// input, the underlying cause of the `RequestError` will be returned /// as an Err. This allows the error to be propagated back to the user. - #[allow(clippy::missing_panics_doc)] pub fn into_response(self) -> Result { match self { RequestError::UserInputContent(_, e) => Err(e), e => { let src = e.input_source(); Ok(Response::new( - Uri::try_from(ERROR_URL).unwrap(), + ERROR_URI.clone(), Status::RequestError(e), src, )) @@ -80,10 +80,11 @@ impl RequestError { #[cfg(test)] mod tests { - use super::{ERROR_URL, Uri}; + use super::ERROR_URI; + use std::sync::LazyLock; #[test] fn test_error_url_parses() { - assert!(Uri::try_from(ERROR_URL).is_ok()); + let _ = LazyLock::force(&ERROR_URI); } } From 5aff2e6dc928f8b1b0a8d663f8bd7a373dd8f8be Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 17 Nov 2025 21:23:19 +1000 Subject: [PATCH 63/63] inline errs_iter and reqs_iter with parentheses --- lychee-lib/src/utils/request.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 2f8bc0afba..8d64b2fd67 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -141,10 +141,9 @@ pub(crate) fn create( } } - let errs_iter = errors.into_iter().map(Result::Err); - let reqs_iter = requests.into_iter().map(Result::Ok); - - reqs_iter.chain(errs_iter).collect() + (requests.into_iter().map(Result::Ok)) + .chain(errors.into_iter().map(Result::Err)) + .collect() } /// Create a URI from a path