Skip to content

Commit e81797d

Browse files
committed
Enable shallow clones and fetches for registry and git dependencies.
The implementation hinges on passing information about the kind of clone and fetch to the `fetch()` method, which then configures the fetch accordingly. Note that it doesn't differentiate between initial clones and fetches as the shallow-ness of the repository is maintained nonetheless.
1 parent 41412a1 commit e81797d

File tree

6 files changed

+547
-20
lines changed

6 files changed

+547
-20
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ filetime = "0.2.9"
3030
flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] }
3131
git2 = "0.16.0"
3232
git2-curl = "0.17.0"
33-
gix = { version = "0.41.0", default-features = false, features = ["blocking-http-transport-curl", "progress-tree"] }
33+
gix = { version = "0.42.0", default-features = false, features = ["blocking-http-transport-curl", "progress-tree"] }
3434
gix-features-for-configuration-only = { version = "0.28.0", package = "gix-features", features = [ "parallel" ] }
3535
glob = "0.3.0"
3636
hex = "0.4"

src/cargo/sources/git/mod.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,14 @@ mod source;
66
mod utils;
77

88
pub mod fetch {
9+
/// The kind remote repository to fetch.
10+
#[derive(Debug, Copy, Clone)]
11+
pub enum RemoteKind {
12+
/// A repository belongs to a git dependency.
13+
GitDependency,
14+
/// A repository belongs to a Cargo registry.
15+
Registry,
16+
}
17+
918
pub type Error = gix::env::collate::fetch::Error<gix::refspec::parse::Error>;
1019
}

src/cargo/sources/git/oxide.rs

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ pub fn with_retry_and_progress(
2929
) -> CargoResult<()> {
3030
std::thread::scope(|s| {
3131
let mut progress_bar = Progress::new("Fetch", config);
32+
let is_shallow = config
33+
.cli_unstable()
34+
.gitoxide
35+
.map_or(false, |gix| gix.shallow_deps || gix.shallow_index);
3236
network::with_retry(config, || {
3337
let progress_root: Arc<gix::progress::tree::Root> =
3438
gix::progress::tree::root::Options {
@@ -50,7 +54,7 @@ pub fn with_retry_and_progress(
5054
);
5155
amend_authentication_hints(res, urls.get_mut().take())
5256
});
53-
translate_progress_to_bar(&mut progress_bar, root)?;
57+
translate_progress_to_bar(&mut progress_bar, root, is_shallow)?;
5458
thread.join().expect("no panic in scoped thread")
5559
})
5660
})
@@ -59,7 +63,9 @@ pub fn with_retry_and_progress(
5963
fn translate_progress_to_bar(
6064
progress_bar: &mut Progress<'_>,
6165
root: Weak<gix::progress::tree::Root>,
66+
is_shallow: bool,
6267
) -> CargoResult<()> {
68+
let remote_progress: gix::progress::Id = gix::remote::fetch::ProgressId::RemoteProgress.into();
6369
let read_pack_bytes: gix::progress::Id =
6470
gix::odb::pack::bundle::write::ProgressId::ReadPackBytes.into();
6571
let delta_index_objects: gix::progress::Id =
@@ -88,6 +94,7 @@ fn translate_progress_to_bar(
8894
"progress should be smoother by keeping these as multiples of each other"
8995
);
9096

97+
let num_phases = if is_shallow { 3 } else { 2 }; // indexing + delta-resolution, both with same amount of objects to handle
9198
while let Some(root) = root.upgrade() {
9299
std::thread::sleep(sleep_interval);
93100
let needs_update = last_fast_update.elapsed() >= fast_check_interval;
@@ -102,31 +109,37 @@ fn translate_progress_to_bar(
102109
fn progress_by_id(
103110
id: gix::progress::Id,
104111
task: &gix::progress::Task,
105-
) -> Option<&gix::progress::Value> {
106-
(task.id == id).then(|| task.progress.as_ref()).flatten()
112+
) -> Option<(&str, &gix::progress::Value)> {
113+
(task.id == id)
114+
.then(|| task.progress.as_ref())
115+
.flatten()
116+
.map(|value| (task.name.as_str(), value))
107117
}
108118
fn find_in<K>(
109119
tasks: &[(K, gix::progress::Task)],
110-
cb: impl Fn(&gix::progress::Task) -> Option<&gix::progress::Value>,
111-
) -> Option<&gix::progress::Value> {
120+
cb: impl Fn(&gix::progress::Task) -> Option<(&str, &gix::progress::Value)>,
121+
) -> Option<(&str, &gix::progress::Value)> {
112122
tasks.iter().find_map(|(_, t)| cb(t))
113123
}
114124

115-
const NUM_PHASES: usize = 2; // indexing + delta-resolution, both with same amount of objects to handle
116-
if let Some(objs) = find_in(&tasks, |t| progress_by_id(resolve_objects, t)) {
117-
// Resolving deltas.
125+
if let Some((_, objs)) = find_in(&tasks, |t| progress_by_id(resolve_objects, t)) {
126+
// Phase 3: Resolving deltas.
118127
let objects = objs.step.load(Ordering::Relaxed);
119128
let total_objects = objs.done_at.expect("known amount of objects");
120129
let msg = format!(", ({objects}/{total_objects}) resolving deltas");
121130

122-
progress_bar.tick(total_objects + objects, total_objects * NUM_PHASES, &msg)?;
131+
progress_bar.tick(
132+
(total_objects * (num_phases - 1)) + objects,
133+
total_objects * num_phases,
134+
&msg,
135+
)?;
123136
} else if let Some((objs, read_pack)) =
124137
find_in(&tasks, |t| progress_by_id(read_pack_bytes, t)).and_then(|read| {
125138
find_in(&tasks, |t| progress_by_id(delta_index_objects, t))
126-
.map(|delta| (delta, read))
139+
.map(|delta| (delta.1, read.1))
127140
})
128141
{
129-
// Receiving objects.
142+
// Phase 2: Receiving objects.
130143
let objects = objs.step.load(Ordering::Relaxed);
131144
let total_objects = objs.done_at.expect("known amount of objects");
132145
let received_bytes = read_pack.step.load(Ordering::Relaxed);
@@ -139,7 +152,25 @@ fn translate_progress_to_bar(
139152
let (rate, unit) = human_readable_bytes(counter.rate() as u64);
140153
let msg = format!(", {rate:.2}{unit}/s");
141154

142-
progress_bar.tick(objects, total_objects * NUM_PHASES, &msg)?;
155+
progress_bar.tick(
156+
(total_objects * (num_phases - 2)) + objects,
157+
total_objects * num_phases,
158+
&msg,
159+
)?;
160+
} else if let Some((action, remote)) =
161+
find_in(&tasks, |t| progress_by_id(remote_progress, t))
162+
{
163+
if !is_shallow {
164+
continue;
165+
}
166+
// phase 1: work on the remote side
167+
168+
// Resolving deltas.
169+
let objects = remote.step.load(Ordering::Relaxed);
170+
if let Some(total_objects) = remote.done_at {
171+
let msg = format!(", ({objects}/{total_objects}) {action}");
172+
progress_bar.tick(objects, total_objects * num_phases, &msg)?;
173+
}
143174
}
144175
}
145176
Ok(())

src/cargo/sources/git/utils.rs

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
//! Utilities for handling git repositories, mainly around
22
//! authentication/cloning.
33
4+
use crate::core::features::GitoxideFeatures;
45
use crate::core::{GitReference, Verbosity};
6+
use crate::sources::git::fetch::RemoteKind;
57
use crate::sources::git::oxide;
68
use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
79
use crate::util::errors::CargoResult;
@@ -96,9 +98,16 @@ impl GitRemote {
9698
// if we can. If that can successfully load our revision then we've
9799
// populated the database with the latest version of `reference`, so
98100
// return that database and the rev we resolve to.
101+
let remote_kind = RemoteKind::GitDependency;
99102
if let Some(mut db) = db {
100-
fetch(&mut db.repo, self.url.as_str(), reference, cargo_config)
101-
.context(format!("failed to fetch into: {}", into.display()))?;
103+
fetch(
104+
&mut db.repo,
105+
self.url.as_str(),
106+
reference,
107+
cargo_config,
108+
remote_kind,
109+
)
110+
.context(format!("failed to fetch into: {}", into.display()))?;
102111
match locked_rev {
103112
Some(rev) => {
104113
if db.contains(rev) {
@@ -121,8 +130,14 @@ impl GitRemote {
121130
}
122131
paths::create_dir_all(into)?;
123132
let mut repo = init(into, true)?;
124-
fetch(&mut repo, self.url.as_str(), reference, cargo_config)
125-
.context(format!("failed to clone into: {}", into.display()))?;
133+
fetch(
134+
&mut repo,
135+
self.url.as_str(),
136+
reference,
137+
cargo_config,
138+
remote_kind,
139+
)
140+
.context(format!("failed to clone into: {}", into.display()))?;
126141
let rev = match locked_rev {
127142
Some(rev) => rev,
128143
None => reference.resolve(&repo)?,
@@ -282,6 +297,12 @@ impl<'a> GitCheckout<'a> {
282297
.with_checkout(checkout)
283298
.fetch_options(fopts)
284299
.clone(url.as_str(), into)?;
300+
if database.repo.is_shallow() {
301+
std::fs::copy(
302+
database.repo.path().join("shallow"),
303+
r.path().join("shallow"),
304+
)?;
305+
}
285306
repo = Some(r);
286307
Ok(())
287308
})?;
@@ -432,7 +453,14 @@ impl<'a> GitCheckout<'a> {
432453
cargo_config
433454
.shell()
434455
.status("Updating", format!("git submodule `{}`", url))?;
435-
fetch(&mut repo, &url, &reference, cargo_config).with_context(|| {
456+
fetch(
457+
&mut repo,
458+
&url,
459+
&reference,
460+
cargo_config,
461+
RemoteKind::GitDependency,
462+
)
463+
.with_context(|| {
436464
format!(
437465
"failed to fetch submodule `{}` from {}",
438466
child.name().unwrap_or(""),
@@ -803,11 +831,14 @@ pub fn with_fetch_options(
803831
})
804832
}
805833

834+
/// Note that `kind` is only needed to know how to interpret `gitoxide` feature options to potentially shallow-clone
835+
/// the repository.
806836
pub fn fetch(
807837
repo: &mut git2::Repository,
808838
orig_url: &str,
809839
reference: &GitReference,
810840
config: &Config,
841+
kind: RemoteKind,
811842
) -> CargoResult<()> {
812843
if config.frozen() {
813844
anyhow::bail!(
@@ -893,6 +924,25 @@ pub fn fetch(
893924
let git2_repo = repo;
894925
let config_overrides = cargo_config_to_gitoxide_overrides(config)?;
895926
let repo_reinitialized = AtomicBool::default();
927+
let has_feature = |cb: &dyn Fn(GitoxideFeatures) -> bool| {
928+
config
929+
.cli_unstable()
930+
.gitoxide
931+
.map_or(false, |features| cb(features))
932+
};
933+
let shallow = if git2_repo.is_shallow() {
934+
gix::remote::fetch::Shallow::NoChange
935+
} else {
936+
match kind {
937+
RemoteKind::GitDependency if has_feature(&|git| git.shallow_deps) => {
938+
gix::remote::fetch::Shallow::DepthAtRemote(1.try_into().expect("non-zero"))
939+
}
940+
RemoteKind::Registry if has_feature(&|git| git.shallow_index) => {
941+
gix::remote::fetch::Shallow::DepthAtRemote(1.try_into().expect("non-zero"))
942+
}
943+
_ => gix::remote::fetch::Shallow::NoChange,
944+
}
945+
};
896946
let res = oxide::with_retry_and_progress(
897947
&git2_repo.path().to_owned(),
898948
config,
@@ -952,6 +1002,7 @@ pub fn fetch(
9521002
);
9531003
let outcome = connection
9541004
.prepare_fetch(gix::remote::ref_map::Options::default())?
1005+
.with_shallow(shallow.clone())
9551006
.receive(should_interrupt)?;
9561007
Ok(outcome)
9571008
});
@@ -967,6 +1018,7 @@ pub fn fetch(
9671018
// folder before writing files into it, or else not even open a directory as git repository (which is
9681019
// also handled here).
9691020
&& err.is_corrupted()
1021+
|| has_shallow_lock_file(&err)
9701022
{
9711023
repo_reinitialized.store(true, Ordering::Relaxed);
9721024
debug!(
@@ -1005,6 +1057,12 @@ pub fn fetch(
10051057
// again. If it looks like any other kind of error, or if we've already
10061058
// blown away the repository, then we want to return the error as-is.
10071059
let mut repo_reinitialized = false;
1060+
// while shallow repos aren't officially supported, don't risk fetching them.
1061+
// We are in this situation only when `gitoxide` is cloning but then disabled to use `git2`
1062+
// for fetching.
1063+
if repo.is_shallow() {
1064+
reinitialize(repo)?;
1065+
}
10081066
loop {
10091067
debug!("initiating fetch of {:?} from {}", refspecs, orig_url);
10101068
let res = repo
@@ -1036,6 +1094,17 @@ pub fn fetch(
10361094
}
10371095
}
10381096

1097+
/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
1098+
/// files atomically.
1099+
/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
1100+
/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
1101+
fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
1102+
matches!(
1103+
err,
1104+
gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::LockShallowFile(_))
1105+
)
1106+
}
1107+
10391108
fn fetch_with_cli(
10401109
repo: &mut git2::Repository,
10411110
url: &str,

src/cargo/sources/registry/remote.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::core::{GitReference, PackageId, SourceId};
22
use crate::sources::git;
3+
use crate::sources::git::fetch::RemoteKind;
34
use crate::sources::registry::download;
45
use crate::sources::registry::MaybeLock;
56
use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData};
@@ -300,8 +301,14 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
300301
// checkout.
301302
let url = self.source_id.url();
302303
let repo = self.repo.borrow_mut().unwrap();
303-
git::fetch(repo, url.as_str(), &self.index_git_ref, self.config)
304-
.with_context(|| format!("failed to fetch `{}`", url))?;
304+
git::fetch(
305+
repo,
306+
url.as_str(),
307+
&self.index_git_ref,
308+
self.config,
309+
RemoteKind::Registry,
310+
)
311+
.with_context(|| format!("failed to fetch `{}`", url))?;
305312

306313
// Create a dummy file to record the mtime for when we updated the
307314
// index.

0 commit comments

Comments
 (0)