Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ Options:
Base URL or website root directory to check relative URLs e.g. <https://example.com> or `/path/to/public`

--root-dir <ROOT_DIR>
Root path to use when checking absolute local links, must be an absolute path
Root directory to use when checking absolute local links

--basic-auth <BASIC_AUTH>
Basic authentication support. E.g. `http://example.com username:password`
Expand Down
8 changes: 8 additions & 0 deletions fixtures/absolute_paths/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<!DOCTYPE html>
<html>
<body>
<a href="/about">About</a>
<a href="/products">Products</a>
<a href="/contact">Contact</a>
</body>
</html>
2 changes: 1 addition & 1 deletion fixtures/fragments/file.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
<a href="#in-the-end">doesn't exist</a><br>
</section>
</body>
</html>
</html>
9 changes: 9 additions & 0 deletions fixtures/html_fragments/page.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<!DOCTYPE html>
<html>
<body>
<h1 id="existing-section">Existing Section</h1>
<a href="#missing-section">Missing Link</a>
<a href="section.html#existing-heading">Valid Link</a>
<a href="section.html#non-existent-heading">Invalid Link</a>
</body>
</html>
7 changes: 7 additions & 0 deletions fixtures/html_fragments/section.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!DOCTYPE html>
<html>
<body>
<h1 id="existing-heading">Existing Heading</h1>
<p>Test content</p>
</body>
</html>
11 changes: 11 additions & 0 deletions fixtures/relative_fragments/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Test Page</title>
</head>
<body>
<a href="#scrubs-wichtig">Important Section</a>
<a href="#unterschiedliche-platten">Different Disks</a>
<!-- Note: These anchors don't exist, which is intentional for the test -->
</body>
</html>
Empty file.
Empty file.
9 changes: 9 additions & 0 deletions fixtures/relative_paths/releases/v9_7/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<!DOCTYPE html>
<html>
<body>
<a href="./">Current</a>
<a href="../">Releases</a>
<a href="../system_maintenance/">Maintenance</a>
<a href="../../software/vpn/">VPN</a>
</body>
</html>
4 changes: 4 additions & 0 deletions fixtures/remote_fragments.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<a href="https://github.com/lycheeverse/lychee#non-existent-anchor"
>lychee-repo</a
>
<a href="https://lychee.cli.rs#missing-section">lychee-docs</a>
10 changes: 10 additions & 0 deletions fixtures/root_dir_fragments/about.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>About</title>
</head>
<body>
<h1 id="introduction">Introduction</h1>
<p>This is the about page.</p>
</body>
</html>
13 changes: 13 additions & 0 deletions fixtures/root_dir_fragments/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Index</title>
</head>
<body>
<h1>Index</h1>
<a href="#local-section">Local Link</a>
<div id="local-section">Local Section</div>

<a href="about.html#introduction">About</a>
</body>
</html>
1 change: 0 additions & 1 deletion lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -

ClientBuilder::builder()
.remaps(remaps)
.base(cfg.base.clone())
.includes(includes)
.excludes(excludes)
.exclude_all_private(cfg.exclude_all_private)
Expand Down
59 changes: 39 additions & 20 deletions lychee-bin/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,14 @@ where
{
tokio::pin!(requests);
while let Some(request) = requests.next().await {
let request = request?;
let request = request;
if let Some(pb) = &bar {
pb.inc_length(1);
pb.set_message(request.to_string());
if let Ok(request) = &request {
pb.set_message(request.to_string());
}
};
send_req
.send(Ok(request))
.await
.expect("Cannot send request");
send_req.send(request).await.expect("Cannot send request");
}
Ok(())
}
Expand Down Expand Up @@ -228,20 +227,40 @@ async fn request_channel_task(
ReceiverStream::new(recv_req),
max_concurrency,
|request: Result<Request>| async {
let request = request.expect("cannot read request");
let response = handle(
&client,
cache.clone(),
cache_exclude_status.clone(),
request,
accept.clone(),
)
.await;

send_resp
.send(response)
.await
.expect("cannot send response to queue");
if let Ok(request) = request {
let response = handle(
&client,
cache.clone(),
cache_exclude_status.clone(),
request,
accept.clone(),
)
.await;
send_resp
.send(response)
.await
.expect("cannot send response to queue");
};

// let response = match request {
// Ok(req) => {
// handle(
// &client,
// cache.clone(),
// cache_exclude_status.clone(),
// req,
// accept.clone(),
// )
// .await
// }
// Err(e) => {
// log::error!("Error reading request: {}", e);
// }

// send_resp
// .send(response)
// .await
// .expect("cannot send response to queue");
},
)
.await;
Expand Down
35 changes: 30 additions & 5 deletions lychee-bin/src/commands/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,39 +34,64 @@ pub(crate) async fn dump<S>(params: CommandParams<S>) -> Result<ExitCode>
where
S: futures::Stream<Item = Result<Request>>,
{
println!("Starting dump function");
println!("About to get requests from params");
let requests = params.requests;
println!("Got requests stream: {:?}", std::any::type_name::<S>());
tokio::pin!(requests);
println!("Pinned requests stream");

if let Some(out_file) = &params.cfg.output {
println!("Creating output file: {:?}", out_file);
fs::File::create(out_file)?;
}

let mut writer = create_writer(params.cfg.output)?;
println!("Writer created successfully");
println!("About to await first request");

match requests.next().await {
Some(Ok(req)) => {
println!(
"Got valid request: source={:?}, uri={:?}",
req.source, req.uri
);
}
Some(Err(e)) => {
println!("Got error from stream: {:?}", e);
}
None => {
println!("Stream returned None immediately");
}
}

while let Some(request) = requests.next().await {
println!("Processing new request");
let mut request = request?;

// Apply URI remappings (if any)
println!("Original URI: {:?}", request.uri);
params.client.remap(&mut request.uri)?;
println!("Remapped URI: {:?}", request.uri);

let excluded = params.client.is_excluded(&request.uri);
println!("Request excluded: {}", excluded);

if excluded && params.cfg.verbose.log_level() < log::Level::Info {
println!("Skipping excluded request");
continue;
}

if let Err(e) = write(&mut writer, &request, &params.cfg.verbose, excluded) {
// Avoid panic on broken pipe.
// See https://github.com/rust-lang/rust/issues/46016
// This can occur when piping the output of lychee
// to another program like `grep`.
println!("Write error occurred: {:?}", e);
if e.kind() != io::ErrorKind::BrokenPipe {
error!("{e}");
return Ok(ExitCode::UnexpectedFailure);
}
println!("Broken pipe detected, continuing");
}
}

println!("Dump completed successfully");
Ok(ExitCode::Success)
}

Expand Down
32 changes: 29 additions & 3 deletions lychee-bin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ use openssl_sys as _; // required for vendored-openssl feature
use options::LYCHEE_CONFIG_FILE;
use ring as _; // required for apple silicon

use lychee_lib::BasicAuthExtractor;
use lychee_lib::Collector;
use lychee_lib::CookieJar;
use lychee_lib::{BasicAuthExtractor, RootDir};
use lychee_lib::{Collector, Input, InputSource};

mod archive;
mod cache;
Expand Down Expand Up @@ -287,8 +287,9 @@ fn underlying_io_error_kind(error: &Error) -> Option<io::ErrorKind> {
/// Run lychee on the given inputs
async fn run(opts: &LycheeOptions) -> Result<i32> {
let inputs = opts.inputs()?;
let root_dir = set_root_dir(&inputs, &opts.config)?;

let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone())?
let mut collector = Collector::new(root_dir, opts.config.base.clone())?
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
.skip_ignored(!opts.config.no_ignore)
Expand Down Expand Up @@ -387,3 +388,28 @@ async fn run(opts: &LycheeOptions) -> Result<i32> {

Ok(exit_code as i32)
}

/// Set the root directory based on the passed configuration
/// as well as the current working directory if no root directory is set
/// and we have exactly one input, which is a directory.
///
/// In all other cases, set the root directory to `None`.
fn set_root_dir(inputs: &[Input], config: &Config) -> Result<Option<RootDir>> {
if let Some(root_dir) = &config.root_dir {
Ok(Some(root_dir.clone()))
} else {
if inputs.len() == 1 {
let input = &inputs[0];
if input.is_dir() {
match input.source {
InputSource::FsPath(ref path) => Ok(Some(RootDir::new(path)?)),
_ => bail!("Cannot set root directory for input: {:?}", input),
}
} else {
Ok(None)
}
} else {
Ok(None)
}
}
}
18 changes: 9 additions & 9 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use crate::archive::Archive;
use crate::parse::parse_base;
use crate::parse::{parse_base, parse_root_dir};
use crate::verbosity::Verbosity;
use anyhow::{anyhow, Context, Error, Result};
use clap::builder::PossibleValuesParser;
use clap::{arg, builder::TypedValueParser, Parser};
use const_format::{concatcp, formatcp};
use lychee_lib::{
Base, BasicAuthSelector, Input, StatusCodeExcluder, StatusCodeSelector, DEFAULT_MAX_REDIRECTS,
DEFAULT_MAX_RETRIES, DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
Base, BasicAuthSelector, Input, RootDir, StatusCodeExcluder, StatusCodeSelector, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES, DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT
};
use secrecy::{ExposeSecret, SecretString};
use serde::Deserialize;
Expand Down Expand Up @@ -169,6 +168,7 @@ macro_rules! fold_in {
#[command(version, about)]
pub(crate) struct LycheeOptions {
/// The inputs (where to get links to check from).
///
/// These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`),
/// remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`).
/// NOTE: Use `--` to separate inputs from options that allow multiple arguments.
Expand All @@ -180,13 +180,14 @@ pub(crate) struct LycheeOptions {
#[arg(help = HELP_MSG_CONFIG_FILE)]
pub(crate) config_file: Option<PathBuf>,

/// The parsed configuration
#[clap(flatten)]
pub(crate) config: Config,
}

impl LycheeOptions {
/// Get parsed inputs from options.
// This depends on the config, which is why a method is required (we could
// This depends on the parsed config, which is why a method is required (we could
// accept a `Vec<Input>` in `LycheeOptions` and do the conversion there, but
// we wouldn't get access to `glob_ignore_case`.
pub(crate) fn inputs(&self) -> Result<Vec<Input>> {
Expand Down Expand Up @@ -441,15 +442,14 @@ separated list of accepted status codes. This example will accept 200, 201,

/// Base URL or website root directory to check relative URLs
/// e.g. <https://example.com> or `/path/to/public`
#[arg(short, long, value_parser= parse_base)]
#[arg(short, long, value_parser = parse_base)]
#[serde(default)]
pub(crate) base: Option<Base>,

/// Root path to use when checking absolute local links,
/// must be an absolute path
#[arg(long)]
/// Root directory to use when checking absolute local links
#[arg(long, value_parser = parse_root_dir)]
#[serde(default)]
pub(crate) root_dir: Option<PathBuf>,
pub(crate) root_dir: Option<RootDir>,

/// Basic authentication support. E.g. `http://example.com username:password`
#[arg(long)]
Expand Down
10 changes: 7 additions & 3 deletions lychee-bin/src/parse.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use anyhow::{anyhow, Context, Result};
use headers::{HeaderMap, HeaderName};
use lychee_lib::{remap::Remaps, Base};
use lychee_lib::{remap::Remaps, Base, RootDir};
use std::time::Duration;

/// Split a single HTTP header into a (key, value) tuple
Expand Down Expand Up @@ -36,8 +36,12 @@ pub(crate) fn parse_remaps(remaps: &[String]) -> Result<Remaps> {
.context("Remaps must be of the form '<pattern> <uri>' (separated by whitespace)")
}

pub(crate) fn parse_base(src: &str) -> Result<Base, lychee_lib::ErrorKind> {
Base::try_from(src)
pub(crate) fn parse_base(base: &str) -> Result<Base, lychee_lib::ErrorKind> {
Base::try_from(base)
}

pub(crate) fn parse_root_dir(root_dir: &str) -> Result<RootDir, lychee_lib::ErrorKind> {
RootDir::try_from(root_dir)
}

#[cfg(test)]
Expand Down
Loading
Loading