Skip to content

Commit d0e950d

Browse files
committed
Optimize pushes of multiple refs
When pushing multiple refs, we know any Git objects on the remote side can be excluded from the objects that refer to LFS objects we need to push, since if the remote side already has the Git objects, it should have the corresponding LFS objects as well. However, when traversing Git objects, we traverse them on a per-ref basis, which is required since any LFS objects which spawn a batch request will need the ref to be placed in the batch request as part of the protocol. Let's find a list of all the remote sides that exist before traversing any Git objects, and exclude traversing any of those objects in any traversal. As a result, we can traverse far, far fewer objects, especially when pushing new refs in a large repository. Note that we exclude the case when the left and right sides are the same because our code sets them to the same thing in some cases even though Git does not, so we cannot reason about the values in that case.
1 parent 1b3cf7a commit d0e950d

File tree

4 files changed

+87
-3
lines changed

4 files changed

+87
-3
lines changed

commands/uploader.go

+10-3
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,19 @@ func uploadForRefUpdates(ctx *uploadContext, updates []*git.RefUpdate, pushAll b
3131
}()
3232

3333
verifyLocksForUpdates(ctx.lockVerifier, updates)
34+
rightSides := make([]string, 0, len(updates))
35+
for _, update := range updates {
36+
right := update.Right().Sha
37+
if update.LeftCommitish() != right {
38+
rightSides = append(rightSides, right)
39+
}
40+
}
3441
for _, update := range updates {
3542
// initialized here to prevent looped defer
3643
q := ctx.NewQueue(
3744
tq.RemoteRef(update.Right()),
3845
)
39-
err := uploadLeftOrAll(gitscanner, ctx, q, update, pushAll)
46+
err := uploadLeftOrAll(gitscanner, ctx, q, rightSides, update, pushAll)
4047
ctx.CollectErrors(q)
4148

4249
if err != nil {
@@ -47,7 +54,7 @@ func uploadForRefUpdates(ctx *uploadContext, updates []*git.RefUpdate, pushAll b
4754
return nil
4855
}
4956

50-
func uploadLeftOrAll(g *lfs.GitScanner, ctx *uploadContext, q *tq.TransferQueue, update *git.RefUpdate, pushAll bool) error {
57+
func uploadLeftOrAll(g *lfs.GitScanner, ctx *uploadContext, q *tq.TransferQueue, bases []string, update *git.RefUpdate, pushAll bool) error {
5158
cb := ctx.gitScannerCallback(q)
5259
if pushAll {
5360
if err := g.ScanRefWithDeleted(update.LeftCommitish(), cb); err != nil {
@@ -59,7 +66,7 @@ func uploadLeftOrAll(g *lfs.GitScanner, ctx *uploadContext, q *tq.TransferQueue,
5966
if left == right {
6067
right = ""
6168
}
62-
if err := g.ScanRangeToRemote(left, right, cb); err != nil {
69+
if err := g.ScanMultiRangeToRemote(left, bases, cb); err != nil {
6370
return err
6471
}
6572
}

lfs/gitscanner.go

+19
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,25 @@ func (s *GitScanner) ScanRangeToRemote(left, right string, cb GitScannerFoundPoi
9595
return scanLeftRightToChan(s, callback, left, right, s.cfg.OSEnv(), s.opts(ScanRangeToRemoteMode))
9696
}
9797

98+
// ScanMultiRangeToRemote scans through all commits starting at the left ref but
99+
// not including the right ref (if given) that the given remote does not have.
100+
// See RemoteForPush().
101+
func (s *GitScanner) ScanMultiRangeToRemote(left string, rights []string, cb GitScannerFoundPointer) error {
102+
callback, err := firstGitScannerCallback(cb, s.FoundPointer)
103+
if err != nil {
104+
return err
105+
}
106+
107+
s.mu.Lock()
108+
if len(s.remote) == 0 {
109+
s.mu.Unlock()
110+
return fmt.Errorf("unable to scan starting at %q: no remote set", left)
111+
}
112+
s.mu.Unlock()
113+
114+
return scanMultiLeftRightToChan(s, callback, left, rights, s.cfg.OSEnv(), s.opts(ScanRangeToRemoteMode))
115+
}
116+
98117
// ScanRefs through all commits reachable by refs contained in "include" and
99118
// not reachable by any refs included in "excluded"
100119
func (s *GitScanner) ScanRefs(include, exclude []string, cb GitScannerFoundPointer) error {

lfs/gitscanner_refs.go

+8
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,14 @@ func scanLeftRightToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer,
100100
return scanRefsToChan(scanner, pointerCb, []string{refLeft}, []string{refRight}, osEnv, opt)
101101
}
102102

103+
// scanMultiLeftRightToChan takes a ref and a set of bases and returns a channel
104+
// of WrappedPointer objects for all Git LFS pointers it finds for that ref.
105+
// Reports unique oids once only, not multiple times if >1 file uses the same
106+
// content
107+
func scanMultiLeftRightToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLeft string, bases []string, osEnv config.Environment, opt *ScanRefsOptions) error {
108+
return scanRefsToChan(scanner, pointerCb, []string{refLeft}, bases, osEnv, opt)
109+
}
110+
103111
// revListShas uses git rev-list to return the list of object sha1s
104112
// for the given ref. If all is true, ref is ignored. It returns a
105113
// channel from which sha1 strings can be read.

t/t-push.sh

+50
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,56 @@ begin_test 'push with data the server already has'
741741
)
742742
end_test
743743

744+
begin_test 'push with multiple refs and data the server already has'
745+
(
746+
set -e
747+
748+
reponame="push-multi-ref-server-data"
749+
setup_remote_repo "$reponame"
750+
clone_repo "$reponame" "$reponame"
751+
752+
git lfs track "*.dat"
753+
git add .gitattributes
754+
git commit -m "initial commit"
755+
756+
contents="abc123"
757+
contents_oid="$(calc_oid "$contents")"
758+
printf "%s" "$contents" > a.dat
759+
git add a.dat
760+
git commit -m "add a.dat"
761+
762+
git push origin master
763+
764+
assert_server_object "$reponame" "$contents_oid"
765+
766+
contents2="def456"
767+
contents2_oid="$(calc_oid "$contents2")"
768+
printf "%s" "$contents2" > b.dat
769+
git add b.dat
770+
git commit -m "add b.dat"
771+
772+
# Create a tag. Normally this would cause the entire history to be traversed
773+
# since it's a new ref, but we no longer do that since we're pushing multiple
774+
# refs.
775+
git tag -m v1.0.0 -a v1.0.0
776+
777+
# We remove the original object. The server already has this.
778+
delete_local_object "$contents_oid"
779+
780+
# We use the URL so that we cannot take advantage of the existing "origin/*"
781+
# refs that we know the server must have.
782+
GIT_TRACE=1 GIT_TRANSFER_TRACE=1 GIT_CURL_VERBOSE=1 \
783+
git push "$(git config remote.origin.url)" master v1.0.0 2>&1 | tee push.log
784+
785+
# We should not find a batch request for the object which is in the earlier
786+
# version of master, since we know the remote side has it.
787+
[ "$(grep -c "$contents_oid" push.log)" = 0 ]
788+
789+
# Yet we should have pushed the new object successfully.
790+
assert_server_object "$reponame" "$contents2_oid"
791+
)
792+
end_test
793+
744794
begin_test "push custom reference"
745795
(
746796
set -e

0 commit comments

Comments
 (0)