Skip to content

Commit 2f06c80

Browse files
committed
Auto merge of #11840 - Byron:shallow-support, r=weihanglo
support for shallow clones and fetches with `gitoxide` This PR makes it possible to enable shallow clones and fetches for git dependencies and crate indices independently with the `-Zgitoxide=fetch,shallow_deps` and `-Zgitoxide=fetch,shallow_index` respectively. ### Tasks * [x] setup the shallow option when fetching, differentiated by 'registry' and 'git-dependency' * [x] validate registries are cloned shallowly *and* fetched shallowly * [x] validate git-dependencies are cloned shallowly *and* fetched shallowly * [x] a test to show what happens if a shallow index is opened with `git2` (*it can open it and fetch like normal, no issues*) * [x] assure that `git2` can safely operate on a shallow clone - we unshallow it beforehand, both for registries and git dependencies * [x] assure git-deps with revisions are handled correctly (they should just not be shallow, and they should unshallow themselves if they are) * [x] make sure shallow index clones aren't seen by older cargo's * [x] make sure shallow git dependency clones aren't seen by older cargo's * [x] shallow.lock test and more test-suite runs with shallow clones enabled for everything * [x] release new version of `gix` with full shallow support and use it here * [x] check why `shallow` files remain after unshallowing. Should they not rather be deleted if empty? - Yes, `git` does so as well, implemented [with this commit](GitoxideLabs/gitoxide@2cd5054) * ~~see if it can be avoided to ever unshallow an existing `-shallow` clone by using the right location from the start. If not, test that we can go `shallow->unshallow->shallow` without a hitch.~~ Cannot happen anymore as it can predict the final location perfectly. * [x] `Cargo.lock` files don't prevent shallow clones * [x] assure all other tests work with shallow cloning enabled (or fix the ones that don't with regression protection) * [x] can the 'split-brain' issue be solved for good? ### Review Notes * there is a chance of 'split brain' in git-dependencies as the logic for determining whether the clone/fetch is shallow is repeated in two places. This isn't the case for registries though. ### Notes * I am highlighting that this is the `gitoxide` version of shallow clones as the `git2` version [might soon be available](libgit2/libgit2#6396) as well. Having that would be good as it would ensure interoperability remains intact. * Maybe for when `git2` has been phased out, i.e. everything else is working, I think (unscientifically) there might be benefits in using worktrees for checkouts. Admittedly I don't know the history of why they weren't used in the first place. Also: `gitoxide` doesn't yet support local clones and might not have to if worktrees were used instead.
2 parents a9465fe + d2734d3 commit 2f06c80

File tree

14 files changed

+1268
-298
lines changed

14 files changed

+1268
-298
lines changed

Cargo.lock

+160-152
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ filetime = "0.2.9"
4242
flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] }
4343
git2 = "0.17.0"
4444
git2-curl = "0.18.0"
45-
gix = { version = "0.39.0", default-features = false, features = ["blocking-http-transport-curl", "progress-tree"] }
46-
gix-features-for-configuration-only = { version = "0.28.0", package = "gix-features", features = [ "parallel" ] }
45+
gix = { version = "0.44.1", default-features = false, features = ["blocking-http-transport-curl", "progress-tree"] }
46+
gix-features-for-configuration-only = { version = "0.29.0", package = "gix-features", features = [ "parallel" ] }
4747
glob = "0.3.0"
4848
hex = "0.4"
4949
hmac = "0.12.1"

src/cargo/sources/git/mod.rs

+41
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,46 @@ mod source;
66
mod utils;
77

88
pub mod fetch {
9+
use crate::core::features::GitoxideFeatures;
10+
use crate::Config;
11+
12+
/// The kind remote repository to fetch.
13+
#[derive(Debug, Copy, Clone)]
14+
pub enum RemoteKind {
15+
/// A repository belongs to a git dependency.
16+
GitDependency,
17+
/// A repository belongs to a Cargo registry.
18+
Registry,
19+
}
20+
21+
impl RemoteKind {
22+
/// Obtain the kind of history we would want for a fetch from our remote knowing if the target repo is already shallow
23+
/// via `repo_is_shallow` along with gitoxide-specific feature configuration via `config`.
24+
/// `rev_and_ref` is additional information that affects whether or not we may be shallow.
25+
pub(crate) fn to_shallow_setting(
26+
&self,
27+
repo_is_shallow: bool,
28+
config: &Config,
29+
) -> gix::remote::fetch::Shallow {
30+
let has_feature = |cb: &dyn Fn(GitoxideFeatures) -> bool| {
31+
config
32+
.cli_unstable()
33+
.gitoxide
34+
.map_or(false, |features| cb(features))
35+
};
36+
37+
// maintain shallow-ness and keep downloading single commits, or see if we can do shallow clones
38+
if !repo_is_shallow {
39+
match self {
40+
RemoteKind::GitDependency if has_feature(&|git| git.shallow_deps) => {}
41+
RemoteKind::Registry if has_feature(&|git| git.shallow_index) => {}
42+
_ => return gix::remote::fetch::Shallow::NoChange,
43+
}
44+
};
45+
46+
gix::remote::fetch::Shallow::DepthAtRemote(1.try_into().expect("non-zero"))
47+
}
48+
}
49+
950
pub type Error = gix::env::collate::fetch::Error<gix::refspec::parse::Error>;
1051
}

src/cargo/sources/git/oxide.rs

+44-13
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ pub fn with_retry_and_progress(
2929
) -> CargoResult<()> {
3030
std::thread::scope(|s| {
3131
let mut progress_bar = Progress::new("Fetch", config);
32+
let is_shallow = config
33+
.cli_unstable()
34+
.gitoxide
35+
.map_or(false, |gix| gix.shallow_deps || gix.shallow_index);
3236
network::retry::with_retry(config, || {
3337
let progress_root: Arc<gix::progress::tree::Root> =
3438
gix::progress::tree::root::Options {
@@ -50,7 +54,7 @@ pub fn with_retry_and_progress(
5054
);
5155
amend_authentication_hints(res, urls.get_mut().take())
5256
});
53-
translate_progress_to_bar(&mut progress_bar, root)?;
57+
translate_progress_to_bar(&mut progress_bar, root, is_shallow)?;
5458
thread.join().expect("no panic in scoped thread")
5559
})
5660
})
@@ -59,7 +63,9 @@ pub fn with_retry_and_progress(
5963
fn translate_progress_to_bar(
6064
progress_bar: &mut Progress<'_>,
6165
root: Weak<gix::progress::tree::Root>,
66+
is_shallow: bool,
6267
) -> CargoResult<()> {
68+
let remote_progress: gix::progress::Id = gix::remote::fetch::ProgressId::RemoteProgress.into();
6369
let read_pack_bytes: gix::progress::Id =
6470
gix::odb::pack::bundle::write::ProgressId::ReadPackBytes.into();
6571
let delta_index_objects: gix::progress::Id =
@@ -88,6 +94,7 @@ fn translate_progress_to_bar(
8894
"progress should be smoother by keeping these as multiples of each other"
8995
);
9096

97+
let num_phases = if is_shallow { 3 } else { 2 }; // indexing + delta-resolution, both with same amount of objects to handle
9198
while let Some(root) = root.upgrade() {
9299
std::thread::sleep(sleep_interval);
93100
let needs_update = last_fast_update.elapsed() >= fast_check_interval;
@@ -102,31 +109,37 @@ fn translate_progress_to_bar(
102109
fn progress_by_id(
103110
id: gix::progress::Id,
104111
task: &gix::progress::Task,
105-
) -> Option<&gix::progress::Value> {
106-
(task.id == id).then(|| task.progress.as_ref()).flatten()
112+
) -> Option<(&str, &gix::progress::Value)> {
113+
(task.id == id)
114+
.then(|| task.progress.as_ref())
115+
.flatten()
116+
.map(|value| (task.name.as_str(), value))
107117
}
108118
fn find_in<K>(
109119
tasks: &[(K, gix::progress::Task)],
110-
cb: impl Fn(&gix::progress::Task) -> Option<&gix::progress::Value>,
111-
) -> Option<&gix::progress::Value> {
120+
cb: impl Fn(&gix::progress::Task) -> Option<(&str, &gix::progress::Value)>,
121+
) -> Option<(&str, &gix::progress::Value)> {
112122
tasks.iter().find_map(|(_, t)| cb(t))
113123
}
114124

115-
const NUM_PHASES: usize = 2; // indexing + delta-resolution, both with same amount of objects to handle
116-
if let Some(objs) = find_in(&tasks, |t| progress_by_id(resolve_objects, t)) {
117-
// Resolving deltas.
125+
if let Some((_, objs)) = find_in(&tasks, |t| progress_by_id(resolve_objects, t)) {
126+
// Phase 3: Resolving deltas.
118127
let objects = objs.step.load(Ordering::Relaxed);
119128
let total_objects = objs.done_at.expect("known amount of objects");
120129
let msg = format!(", ({objects}/{total_objects}) resolving deltas");
121130

122-
progress_bar.tick(total_objects + objects, total_objects * NUM_PHASES, &msg)?;
131+
progress_bar.tick(
132+
(total_objects * (num_phases - 1)) + objects,
133+
total_objects * num_phases,
134+
&msg,
135+
)?;
123136
} else if let Some((objs, read_pack)) =
124137
find_in(&tasks, |t| progress_by_id(read_pack_bytes, t)).and_then(|read| {
125138
find_in(&tasks, |t| progress_by_id(delta_index_objects, t))
126-
.map(|delta| (delta, read))
139+
.map(|delta| (delta.1, read.1))
127140
})
128141
{
129-
// Receiving objects.
142+
// Phase 2: Receiving objects.
130143
let objects = objs.step.load(Ordering::Relaxed);
131144
let total_objects = objs.done_at.expect("known amount of objects");
132145
let received_bytes = read_pack.step.load(Ordering::Relaxed);
@@ -139,7 +152,25 @@ fn translate_progress_to_bar(
139152
let (rate, unit) = human_readable_bytes(counter.rate() as u64);
140153
let msg = format!(", {rate:.2}{unit}/s");
141154

142-
progress_bar.tick(objects, total_objects * NUM_PHASES, &msg)?;
155+
progress_bar.tick(
156+
(total_objects * (num_phases - 2)) + objects,
157+
total_objects * num_phases,
158+
&msg,
159+
)?;
160+
} else if let Some((action, remote)) =
161+
find_in(&tasks, |t| progress_by_id(remote_progress, t))
162+
{
163+
if !is_shallow {
164+
continue;
165+
}
166+
// phase 1: work on the remote side
167+
168+
// Resolving deltas.
169+
let objects = remote.step.load(Ordering::Relaxed);
170+
if let Some(total_objects) = remote.done_at {
171+
let msg = format!(", ({objects}/{total_objects}) {action}");
172+
progress_bar.tick(objects, total_objects * num_phases, &msg)?;
173+
}
143174
}
144175
}
145176
Ok(())
@@ -232,7 +263,7 @@ pub fn open_repo(
232263
) -> Result<gix::Repository, gix::open::Error> {
233264
gix::open_opts(repo_path, {
234265
let mut opts = gix::open::Options::default();
235-
opts.permissions.config = gix::permissions::Config::all();
266+
opts.permissions.config = gix::open::permissions::Config::all();
236267
opts.permissions.config.git_binary = purpose.needs_git_binary_config();
237268
opts.with(gix::sec::Trust::Full)
238269
.config_overrides(config_overrides)

src/cargo/sources/git/source.rs

+29-7
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,26 @@ impl<'cfg> GitSource<'cfg> {
2929
assert!(source_id.is_git(), "id is not git, id={}", source_id);
3030

3131
let remote = GitRemote::new(source_id.url());
32-
let ident = ident(&source_id);
33-
34-
let source = GitSource {
35-
remote,
36-
manifest_reference: source_id.git_reference().unwrap().clone(),
37-
locked_rev: match source_id.precise() {
32+
let manifest_reference = source_id.git_reference().unwrap().clone();
33+
let locked_rev =
34+
match source_id.precise() {
3835
Some(s) => Some(git2::Oid::from_str(s).with_context(|| {
3936
format!("precise value for git is not a git revision: {}", s)
4037
})?),
4138
None => None,
42-
},
39+
};
40+
let ident = ident_shallow(
41+
&source_id,
42+
config
43+
.cli_unstable()
44+
.gitoxide
45+
.map_or(false, |gix| gix.fetch && gix.shallow_deps),
46+
);
47+
48+
let source = GitSource {
49+
remote,
50+
manifest_reference,
51+
locked_rev,
4352
source_id,
4453
path_source: None,
4554
ident,
@@ -63,6 +72,7 @@ impl<'cfg> GitSource<'cfg> {
6372
}
6473
}
6574

75+
/// Create an identifier from a URL, essentially turning `proto://host/path/repo` into `repo-<hash-of-url>`.
6676
fn ident(id: &SourceId) -> String {
6777
let ident = id
6878
.canonical_url()
@@ -76,6 +86,18 @@ fn ident(id: &SourceId) -> String {
7686
format!("{}-{}", ident, short_hash(id.canonical_url()))
7787
}
7888

89+
/// Like `ident()`, but appends `-shallow` to it, turning `proto://host/path/repo` into `repo-<hash-of-url>-shallow`.
90+
///
91+
/// It's important to separate shallow from non-shallow clones for reasons of backwards compatibility - older
92+
/// cargo's aren't necessarily handling shallow clones correctly.
93+
fn ident_shallow(id: &SourceId, is_shallow: bool) -> String {
94+
let mut ident = ident(id);
95+
if is_shallow {
96+
ident.push_str("-shallow");
97+
}
98+
ident
99+
}
100+
79101
impl<'cfg> Debug for GitSource<'cfg> {
80102
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
81103
write!(f, "git repo at {}", self.remote.url())?;

0 commit comments

Comments
 (0)