From 0ed9ca7d8221cebe5785fb95fcc23102573044fc Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Fri, 22 May 2026 09:02:16 +0000 Subject: [PATCH 1/2] fix(git): address R2 review feedback on submodule support - Fix swapped git.Open arguments at lines 112 and 136. The storer was backed by the worktree root and the worktree arg was the .git/ filesystem. This made every workspace restart fail to detect the existing repo, fall through to CloneContext, and hit ErrRepositoryAlreadyExists. (AMREM-14) - Clean up orphaned .git directory when CloneContext fails in cloneSubmodule. Without this, one failed clone permanently poisons the submodule directory: the next Stat(".git") takes the already-cloned fast path and tries to Open a broken repo. (AMREM-16) - Use openSubmoduleRepo in the already-cloned branch of cloneSubmodule, removing the duplicate Chroot/NewStorage/Open sequence. (AMREM-25) - Rewrite extractHost to delegate to transport.NewEndpoint instead of reimplementing SCP URL parsing with a separate regex. (AMREM-24) - Include fetchErr in the "Direct fetch failed" log message so operators see why the targeted fetch was rejected. (AMREM-22) - Track warnings in initSubmodules and adjust the final log line so it does not claim all submodules initialized successfully when warnings were emitted. (AMREM-23) - Correct the SameHost doc comment: port is ignored as a simplification, not to match git credential helpers (which do include port). (AMREM-27) - Change "positive integer" to "non-negative integer (0 disables)" in SubmoduleDepth docs and error messages, matching the actual guard (n < 0). (AMREM-28) - Cap SubmoduleDepth at 100 to bound runaway recursion. (AMREM-31) - Document cross-protocol auth forwarding in submoduleAuthFor as intentional. go-git rejects incompatible auth at the transport layer. (AMREM-15) - Fix test logf to use fmt.Fprintf so format args are expanded and the warning check is not vacuously true. (AMREM-20) - Fix TestCloneRepo/AlreadyCloned to produce a repo in the layout CloneRepo expects (.git/ subdirectory) by cloning first, rather than using gittest.NewRepo which writes objects at the root. Skip the subtest when the test case expects the initial clone to fail. (AMREM-14 test fix) --- docs/env-variables.md | 2 +- git/git.go | 55 ++++++++++++++--------------- git/git_test.go | 19 ++++++++-- git/submodule_auth_internal_test.go | 3 +- options/options.go | 18 ++++++---- options/testdata/options.golden | 4 +-- 6 files changed, 61 insertions(+), 40 deletions(-) diff --git a/docs/env-variables.md b/docs/env-variables.md index 6b5bd28c..e49e2a62 100644 --- a/docs/env-variables.md +++ b/docs/env-variables.md @@ -28,7 +28,7 @@ | `--git-clone-depth` | `ENVBUILDER_GIT_CLONE_DEPTH` | | The depth to use when cloning the Git repository. | | `--git-clone-single-branch` | `ENVBUILDER_GIT_CLONE_SINGLE_BRANCH` | | Clone only a single branch of the Git repository. | | `--git-clone-thinpack` | `ENVBUILDER_GIT_CLONE_THINPACK` | `true` | Git clone with thin pack compatibility enabled, ensuring that even when thin pack compatibility is activated,it will not be turned on for the domain dev.azure.com. | -| `--git-clone-submodules` | `ENVBUILDER_GIT_CLONE_SUBMODULES` | | Clone Git submodules after cloning the repository. Accepts 'true' (max depth 10), 'false' (disabled), or a positive integer for max recursion depth. | +| `--git-clone-submodules` | `ENVBUILDER_GIT_CLONE_SUBMODULES` | | Clone Git submodules after cloning the repository. Accepts 'true' (max depth 10), 'false' (disabled), or a non-negative integer for max recursion depth (0 disables, max 100). | | `--git-username` | `ENVBUILDER_GIT_USERNAME` | | The username to use for Git authentication. This is optional. | | `--git-password` | `ENVBUILDER_GIT_PASSWORD` | | The password to use for Git authentication. This is optional. | | `--git-ssh-private-key-path` | `ENVBUILDER_GIT_SSH_PRIVATE_KEY_PATH` | | Path to an SSH private key to be used for Git authentication. If this is set, then GIT_SSH_PRIVATE_KEY_BASE64 cannot be set. | diff --git a/git/git.go b/git/git.go index 74eaea56..890f9447 100644 --- a/git/git.go +++ b/git/git.go @@ -17,6 +17,7 @@ import ( "github.com/coder/envbuilder/options" "github.com/go-git/go-billy/v5" + billyutil "github.com/go-git/go-billy/v5/util" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/config" "github.com/go-git/go-git/v5/plumbing" @@ -108,8 +109,7 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt return false, fmt.Errorf("chroot .git: %w", err) } gitStorage := filesystem.NewStorage(gitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)) - fsStorage := filesystem.NewStorage(fs, cache.NewObjectLRU(cache.DefaultMaxSize*10)) - repo, err := git.Open(fsStorage, gitDir) + repo, err := git.Open(gitStorage, fs) if errors.Is(err, git.ErrRepositoryNotExists) { err = nil } @@ -133,7 +133,7 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt if errors.Is(err, git.ErrRepositoryAlreadyExists) { // The repository was created between our Open and CloneContext // calls. Reopen it so submodule initialization can still run. - repo, err = git.Open(fsStorage, gitDir) + repo, err = git.Open(gitStorage, fs) if err != nil { return false, fmt.Errorf("reopen existing %q: %w", opts.RepoURL, err) } @@ -453,25 +453,17 @@ var scpLikeURLRegex = regexp.MustCompile(`^([^@]+)@([^:]+):(.+)$`) // extractHost extracts the host from a URL, handling both standard URLs and SCP-like URLs. // Returns empty string if host cannot be determined. func extractHost(u string) string { - // Try standard URL parsing first - if parsed, err := url.Parse(u); err == nil && parsed.Host != "" { - // Remove port if present - host := parsed.Hostname() - return strings.ToLower(host) + ep, err := transport.NewEndpoint(u) + if err != nil || ep.Host == "" { + return "" } - - // Handle SCP-like URLs: user@host:path - if matches := scpLikeURLRegex.FindStringSubmatch(u); matches != nil { - return strings.ToLower(matches[2]) - } - - return "" + return strings.ToLower(ep.Host) } // SameHost checks if two URLs point to the same host. // Used to determine if credentials should be forwarded to submodules. -// The comparison is hostname-only. Port is ignored to match git's -// credential-helper convention, which keys credentials on the host alone. +// The comparison is hostname-only. Port is ignored as a simplification; +// submodules on the same host at different ports are uncommon. func SameHost(url1, url2 string) bool { host1 := extractHost(url1) host2 := extractHost(url2) @@ -605,6 +597,7 @@ func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Re } logf("Parent repository URL: %s", RedactURL(effectiveParentURL)) + var warnings int for _, sub := range subs { select { case <-ctx.Done(): @@ -650,11 +643,13 @@ func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Re subRepo, subWorktree, err := openSubmoduleRepo(parentWorktree, subConfig.Path) if err != nil { logf(" ⚠ Could not open submodule repository %s for nested traversal: %v", subConfig.Name, err) + warnings++ continue } nestedSubs, err := subWorktree.Submodules() if err != nil { logf(" ⚠ Could not list nested submodules in %s: %v", subConfig.Name, err) + warnings++ continue } if len(nestedSubs) == 0 { @@ -666,7 +661,11 @@ func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Re } } - logf("✓ All submodules initialized successfully") + if warnings > 0 { + logf("⚠ Submodule initialization finished with %d warning(s)", warnings) + } else { + logf("✓ All submodules initialized successfully") + } return nil } @@ -674,6 +673,13 @@ func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Re // returns parentAuth if the submodule shares the parent's host, and nil // otherwise. A warning is logged when parent auth is set but withheld // because the hosts differ. +// +// The check is host-only and does not compare transport protocols. If +// the parent uses SSH auth and a submodule on the same host uses HTTPS, +// the SSH auth is forwarded and go-git rejects it at the transport +// layer. This is intentional: returning nil here would silently skip +// auth for a submodule that legitimately needs it under a different +// protocol. func submoduleAuthFor(logf func(string, ...any), parentURL, submoduleURL string, parentAuth transport.AuthMethod) transport.AuthMethod { if parentAuth == nil { return nil @@ -723,15 +729,7 @@ func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktr // Check if already cloned if _, statErr := subFS.Stat(".git"); statErr == nil { logf(" Submodule already cloned, checking out expected commit...") - // Open the existing repository - subGitDir, err := subFS.Chroot(".git") - if err != nil { - return fmt.Errorf("chroot to existing .git: %w", err) - } - subRepo, err := git.Open( - filesystem.NewStorage(subGitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)), - subFS, - ) + subRepo, _, err := openSubmoduleRepo(parentWorktree, subConfig.Path) if err != nil { return fmt.Errorf("open existing submodule: %w", err) } @@ -768,6 +766,7 @@ func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktr NoCheckout: true, }) if err != nil { + _ = billyutil.RemoveAll(subFS, ".git") return fmt.Errorf("clone submodule repository: %w", err) } @@ -796,7 +795,7 @@ func checkoutSubmoduleCommit(ctx context.Context, logf func(string, ...any), sub }) if fetchErr != nil && !errors.Is(fetchErr, git.NoErrAlreadyUpToDate) { // If that fails, try fetching all refs - logf(" Direct fetch failed, fetching all refs...") + logf(" Direct fetch failed (%v), fetching all refs...", fetchErr) fetchAllErr := subRepo.FetchContext(ctx, &git.FetchOptions{ RemoteName: "origin", Auth: submoduleAuth, diff --git a/git/git_test.go b/git/git_test.go index 5c97ed57..8e362611 100644 --- a/git/git_test.go +++ b/git/git_test.go @@ -94,6 +94,9 @@ func TestCloneRepo(t *testing.T) { // We do not overwrite a repo if one is already present. t.Run("AlreadyCloned", func(t *testing.T) { + if !tc.expectClone { + t.Skip("cannot test already-cloned when the initial clone is expected to fail") + } srvFS := memfs.New() _ = gittest.NewRepo(t, srvFS, gittest.Commit(t, "README.md", "Hello, world!", "Wow!")) authMW := mwtest.BasicAuthMW(tc.srvUsername, tc.srvPassword) @@ -102,12 +105,24 @@ func TestCloneRepo(t *testing.T) { tc.mungeURL(&srv.URL) } clientFS := memfs.New() - // A repo already exists! - _ = gittest.NewRepo(t, clientFS) + // Clone once so the repo exists in the layout CloneRepo expects. cloned, err := git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{ Path: "/", RepoURL: srv.URL, Storage: clientFS, + RepoAuth: &githttp.BasicAuth{ + Username: tc.username, + Password: tc.password, + }, + }) + require.NoError(t, err) + require.True(t, cloned) + + // Second call: repo already exists, must be a no-op. + cloned, err = git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{ + Path: "/", + RepoURL: srv.URL, + Storage: clientFS, }) require.NoError(t, err) require.False(t, cloned) diff --git a/git/submodule_auth_internal_test.go b/git/submodule_auth_internal_test.go index f9c0b959..618f077f 100644 --- a/git/submodule_auth_internal_test.go +++ b/git/submodule_auth_internal_test.go @@ -2,6 +2,7 @@ package git import ( "bytes" + "fmt" "testing" "github.com/go-git/go-git/v5/plumbing/transport" @@ -59,7 +60,7 @@ func TestSubmoduleAuthFor(t *testing.T) { t.Run(c.name, func(t *testing.T) { t.Parallel() var buf bytes.Buffer - logf := func(format string, _ ...any) { _, _ = buf.WriteString(format) } + logf := func(format string, args ...any) { fmt.Fprintf(&buf, format, args...) } got := submoduleAuthFor(logf, c.parentURL, c.submoduleURL, c.parentAuth) require.Equal(t, c.wantAuth, got) if c.wantWarn { diff --git a/options/options.go b/options/options.go index 508d1d00..69048b59 100644 --- a/options/options.go +++ b/options/options.go @@ -14,10 +14,13 @@ import ( ) // SubmoduleDepth is a custom type for handling submodule depth that accepts -// "true" (defaults to 10), "false" (0), or a positive integer. +// "true" (defaults to 10), "false" (0), or a non-negative integer (0 disables). type SubmoduleDepth int -const DefaultSubmoduleDepth = 10 +const ( + DefaultSubmoduleDepth SubmoduleDepth = 10 + MaxSubmoduleDepth SubmoduleDepth = 100 +) func (s *SubmoduleDepth) Set(val string) error { lower := strings.ToLower(strings.TrimSpace(val)) @@ -31,11 +34,14 @@ func (s *SubmoduleDepth) Set(val string) error { } n, err := strconv.Atoi(lower) if err != nil { - return fmt.Errorf("invalid submodule depth %q: must be true, false, or a positive integer", val) + return fmt.Errorf("invalid submodule depth %q: must be true, false, or a non-negative integer", val) } if n < 0 { return fmt.Errorf("submodule depth must be non-negative, got %d", n) } + if n > int(MaxSubmoduleDepth) { + return fmt.Errorf("submodule depth must be at most %d, got %d", MaxSubmoduleDepth, n) + } *s = SubmoduleDepth(n) return nil } @@ -149,9 +155,9 @@ type Options struct { // GitCloneThinPack clone with thin pack compabilities. This is optional. GitCloneThinPack bool // GitCloneSubmoduleDepth controls submodule initialization after cloning. - // 0 = disabled (default), positive integer = max recursion depth. + // 0 = disabled (default), >0 = max recursion depth (capped at MaxSubmoduleDepth). // The flag accepts "true" (defaults to DefaultSubmoduleDepth), "false" - // (0), or a positive integer for the max recursion depth. + // (0), or a non-negative integer (0 disables). GitCloneSubmoduleDepth int // GitUsername is the username to use for Git authentication. This is // optional. @@ -436,7 +442,7 @@ func (o *Options) CLI() serpent.OptionSet { Env: WithEnvPrefix("GIT_CLONE_SUBMODULES"), Value: SubmoduleDepthOf(&o.GitCloneSubmoduleDepth), Description: "Clone Git submodules after cloning the repository. " + - "Accepts 'true' (max depth 10), 'false' (disabled), or a positive integer for max recursion depth.", + "Accepts 'true' (max depth 10), 'false' (disabled), or a non-negative integer for max recursion depth (0 disables, max 100).", }, { Flag: "git-username", diff --git a/options/testdata/options.golden b/options/testdata/options.golden index 6c086d56..918b3dde 100644 --- a/options/testdata/options.golden +++ b/options/testdata/options.golden @@ -101,8 +101,8 @@ OPTIONS: --git-clone-submodules submodule-depth, $ENVBUILDER_GIT_CLONE_SUBMODULES Clone Git submodules after cloning the repository. Accepts 'true' (max - depth 10), 'false' (disabled), or a positive integer for max recursion - depth. + depth 10), 'false' (disabled), or a non-negative integer for max + recursion depth (0 disables, max 100). --git-clone-thinpack bool, $ENVBUILDER_GIT_CLONE_THINPACK (default: true) Git clone with thin pack compatibility enabled, ensuring that even From 452bcbb2a6e9f9f3c206466f9debe937a4d232b6 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Fri, 22 May 2026 09:17:04 +0000 Subject: [PATCH 2/2] fix(git): log warning when .git cleanup fails after clone error If billyutil.RemoveAll fails to remove the orphaned .git directory, log a warning so the operator knows the submodule directory may be in a broken state. --- git/git.go | 4 ++- git/git_test.go | 78 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/git/git.go b/git/git.go index 890f9447..a5fcdfe1 100644 --- a/git/git.go +++ b/git/git.go @@ -766,7 +766,9 @@ func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktr NoCheckout: true, }) if err != nil { - _ = billyutil.RemoveAll(subFS, ".git") + if rmErr := billyutil.RemoveAll(subFS, ".git"); rmErr != nil { + logf(" ⚠ Failed to clean up .git after clone failure: %v", rmErr) + } return fmt.Errorf("clone submodule repository: %w", err) } diff --git a/git/git_test.go b/git/git_test.go index 8e362611..c172f582 100644 --- a/git/git_test.go +++ b/git/git_test.go @@ -924,6 +924,84 @@ func TestCloneOptionsFromOptions_Submodules(t *testing.T) { require.Equal(t, 10, cloneOpts.SubmoduleDepth) } +// Regression: git.Open must find a repo that CloneRepo previously +// created. Before the fix, the storer and worktree arguments were +// swapped, so Open looked for HEAD at the worktree root instead of +// .git/ and returned ErrRepositoryNotExists on every restart. +func TestCloneRepo_RestartFindsExistingRepo(t *testing.T) { + t.Parallel() + + srvFS := memfs.New() + _ = gittest.NewRepo(t, srvFS, gittest.Commit(t, "README.md", "hello", "init")) + srv := httptest.NewServer(mwtest.BasicAuthMW("", "")(gittest.NewServer(srvFS))) + t.Cleanup(srv.Close) + + clientFS := memfs.New() + cloned, err := git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{ + Path: "/workspace", + RepoURL: srv.URL, + Storage: clientFS, + }) + require.NoError(t, err) + require.True(t, cloned) + + // Second call simulates a workspace restart. The repo exists on + // disk; CloneRepo must detect it and return (false, nil). + cloned, err = git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{ + Path: "/workspace", + RepoURL: srv.URL, + Storage: clientFS, + }) + require.NoError(t, err) + require.False(t, cloned, "second CloneRepo call must detect the existing repo") +} + +// Regression: a failed submodule clone must not leave behind a .git +// directory that blocks all future retries. Before the fix, MkdirAll +// created .git before CloneContext, and a clone failure left it in +// place. The next start's Stat(".git") found it, entered the +// already-cloned path, and failed permanently. +func TestCloneRepo_SubmoduleCloneFailureAllowsRetry(t *testing.T) { + t.Parallel() + + // Submodule server that rejects all requests. + subSrv := httptest.NewServer(mwtest.BasicAuthMW("secret", "secret")(gittest.NewServer(memfs.New()))) + t.Cleanup(subSrv.Close) + + // Parent server with a submodule pointing at the rejecting server. + subFS := memfs.New() + subRepo := gittest.NewRepo(t, subFS, gittest.Commit(t, "f.txt", "x", "init")) + subHead, err := subRepo.Head() + require.NoError(t, err) + + parentFS := memfs.New() + _ = gittest.NewRepo(t, parentFS, + gittest.Commit(t, "README.md", "hello", "init"), + gittest.CommitSubmodule(t, "sub", subSrv.URL, subHead.Hash()), + ) + parentSrv := httptest.NewServer(mwtest.BasicAuthMW("", "")(gittest.NewServer(parentFS))) + t.Cleanup(parentSrv.Close) + + clientFS := memfs.New() + + // First attempt: clone succeeds but submodule init fails (auth). + _, err = git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{ + Path: "/workspace", + RepoURL: parentSrv.URL, + Storage: clientFS, + SubmoduleDepth: 1, + }) + require.Error(t, err, "submodule clone should fail") + + // The submodule's .git must have been cleaned up. + workFS, err := clientFS.Chroot("/workspace") + require.NoError(t, err) + subModFS, err := workFS.Chroot("sub") + require.NoError(t, err) + _, statErr := subModFS.Stat(".git") + require.Error(t, statErr, ".git must not exist after failed clone") +} + // generates a random ed25519 private key func randKeygen(t *testing.T) gossh.Signer { t.Helper()