Skip to content

Commit 1a0d5e3

Browse files
committed
Auto merge of #11308 - ehuss:clean-tmp-libgit2, r=weihanglo
Clean stale git temp files ### What does this PR try to resolve? When cargo is interrupted while libgit2 is indexing the pack file, it will leave behind a temp file that never gets deleted. These files can be very large. This checks for these stale temp files and deletes them. ### How should we test and review this PR? There is a simulated test here. To test with the actual behavior: 1. Run `CARGO_HOME=chome cargo fetch` 2. While it is "resolving deltas", hit Ctrl-C. 3. Notice that there is a 200MB file in `chome/registry/index/github.lhy31512.workers.dev-1ecc6299db9ec823/.git/objects/pack/` 4. Do that several times if you want, each time adds another 200MB file. 5. Build this PR: `cargo b -r` 6. Run `CARGO_HOME=chome CARGO_LOG=cargo::sources::git::utils=debug ./target/release/cargo fetch` 7. Notice that it deletes the `pack_git2_*` files.
2 parents da20496 + 754de17 commit 1a0d5e3

File tree

3 files changed

+73
-1
lines changed

3 files changed

+73
-1
lines changed

Diff for: src/cargo/sources/git/utils.rs

+39
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,8 @@ pub fn fetch(
807807
// request we're about to issue.
808808
maybe_gc_repo(repo)?;
809809

810+
clean_repo_temp_files(repo);
811+
810812
// Translate the reference desired here into an actual list of refspecs
811813
// which need to get fetched. Additionally record if we're fetching tags.
812814
let mut refspecs = Vec::new();
@@ -996,6 +998,43 @@ fn maybe_gc_repo(repo: &mut git2::Repository) -> CargoResult<()> {
996998
reinitialize(repo)
997999
}
9981000

1001+
/// Removes temporary files left from previous activity.
1002+
///
1003+
/// If libgit2 is interrupted while indexing pack files, it will leave behind
1004+
/// some temporary files that it doesn't clean up. These can be quite large in
1005+
/// size, so this tries to clean things up.
1006+
///
1007+
/// This intentionally ignores errors. This is only an opportunistic cleaning,
1008+
/// and we don't really care if there are issues (there's unlikely anything
1009+
/// that can be done).
1010+
///
1011+
/// The git CLI has similar behavior (its temp files look like
1012+
/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git
1013+
/// prune` which is run by `git gc`. However, it doesn't know about libgit2's
1014+
/// filenames, so they never get cleaned up.
1015+
fn clean_repo_temp_files(repo: &git2::Repository) {
1016+
let path = repo.path().join("objects/pack/pack_git2_*");
1017+
let pattern = match path.to_str() {
1018+
Some(p) => p,
1019+
None => {
1020+
log::warn!("cannot convert {path:?} to a string");
1021+
return;
1022+
}
1023+
};
1024+
let paths = match glob::glob(pattern) {
1025+
Ok(paths) => paths,
1026+
Err(_) => return,
1027+
};
1028+
for path in paths {
1029+
if let Ok(path) = path {
1030+
match paths::remove_file(&path) {
1031+
Ok(_) => log::debug!("removed stale temp git file {path:?}"),
1032+
Err(e) => log::warn!("failed to remove {path:?} while cleaning temp files: {e}"),
1033+
}
1034+
}
1035+
}
1036+
}
1037+
9991038
fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
10001039
// Here we want to drop the current repository object pointed to by `repo`,
10011040
// so we initialize temporary repository in a sub-folder, blow away the

Diff for: tests/testsuite/git.rs

+33
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use std::sync::Arc;
1010
use std::thread;
1111

1212
use cargo_test_support::paths::{self, CargoPathExt};
13+
use cargo_test_support::registry::Package;
1314
use cargo_test_support::{basic_lib_manifest, basic_manifest, git, main_file, path2url, project};
1415
use cargo_test_support::{sleep_ms, t, Project};
1516

@@ -3619,3 +3620,35 @@ fn _corrupted_checkout(with_cli: bool) {
36193620
e.run();
36203621
assert!(ok.exists());
36213622
}
3623+
3624+
#[cargo_test]
3625+
fn cleans_temp_pack_files() {
3626+
// Checks that cargo removes temp files left by libgit2 when it is
3627+
// interrupted (see clean_repo_temp_files).
3628+
Package::new("bar", "1.0.0").publish();
3629+
let p = project()
3630+
.file(
3631+
"Cargo.toml",
3632+
r#"
3633+
[package]
3634+
name = "foo"
3635+
version = "0.1.0"
3636+
3637+
[dependencies]
3638+
bar = "1.0"
3639+
"#,
3640+
)
3641+
.file("src/lib.rs", "")
3642+
.build();
3643+
p.cargo("fetch").run();
3644+
// Simulate what happens when libgit2 is interrupted while indexing a pack file.
3645+
let tmp_path = super::git_gc::find_index().join(".git/objects/pack/pack_git2_91ab40da04fdc2e7");
3646+
fs::write(&tmp_path, "test").unwrap();
3647+
let mut perms = fs::metadata(&tmp_path).unwrap().permissions();
3648+
perms.set_readonly(true);
3649+
fs::set_permissions(&tmp_path, perms).unwrap();
3650+
3651+
// Trigger an index update.
3652+
p.cargo("generate-lockfile").run();
3653+
assert!(!tmp_path.exists());
3654+
}

Diff for: tests/testsuite/git_gc.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use cargo_test_support::registry::Package;
1111

1212
use url::Url;
1313

14-
fn find_index() -> PathBuf {
14+
pub fn find_index() -> PathBuf {
1515
let dir = paths::home().join(".cargo/registry/index");
1616
dir.read_dir().unwrap().next().unwrap().unwrap().path()
1717
}

0 commit comments

Comments
 (0)