From a090685ac2d254d063b3ce67016928a2f94ff148 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Thu, 21 May 2026 14:13:36 +0000 Subject: [PATCH 1/3] refactor(options): rename GitCloneSubmodules field and tidy SubmoduleDepth.Set - Rename Options.GitCloneSubmodules to Options.GitCloneSubmoduleDepth to align with the internal SubmoduleDepth type and make the int's meaning (recursion depth, not count) self-evident. Env var and flag names are unchanged so existing user configuration keeps working. - Use the trimmed/lowered value when parsing the integer in SubmoduleDepth.Set so that values like " 5 " parse correctly. - Move the GIT_CLONE_SUBMODULES tests out of the 'bool' subtest group into a sibling 'submodule depth' group; add a whitespace case. Addresses DEREM-14, DEREM-18, DEREM-19 on coder/envbuilder#485. --- git/git.go | 2 +- git/git_test.go | 10 +++++----- options/options.go | 11 ++++++----- options/options_test.go | 21 +++++++++++++++------ 4 files changed, 27 insertions(+), 17 deletions(-) diff --git a/git/git.go b/git/git.go index 8f048aa0..233611b2 100644 --- a/git/git.go +++ b/git/git.go @@ -376,7 +376,7 @@ func CloneOptionsFromOptions(logf func(string, ...any), options options.Options) ThinPack: options.GitCloneThinPack, Depth: int(options.GitCloneDepth), CABundle: caBundle, - SubmoduleDepth: options.GitCloneSubmodules, + SubmoduleDepth: options.GitCloneSubmoduleDepth, } cloneOpts.RepoAuth = SetupRepoAuth(logf, &options) diff --git a/git/git_test.go b/git/git_test.go index 4125686b..3096ab28 100644 --- a/git/git_test.go +++ b/git/git_test.go @@ -864,11 +864,11 @@ func TestCloneOptionsFromOptions_Submodules(t *testing.T) { fs := memfs.New() opts := options.Options{ - Filesystem: fs, - WorkspaceFolder: "/workspace", - GitURL: "https://example.com/example/repo.git", - GitCloneSubmodules: 10, - GitCloneThinPack: true, + Filesystem: fs, + WorkspaceFolder: "/workspace", + GitURL: "https://example.com/example/repo.git", + GitCloneSubmoduleDepth: 10, + GitCloneThinPack: true, } cloneOpts, err := git.CloneOptionsFromOptions(t.Logf, opts) diff --git a/options/options.go b/options/options.go index 7d8b554f..508d1d00 100644 --- a/options/options.go +++ b/options/options.go @@ -29,7 +29,7 @@ func (s *SubmoduleDepth) Set(val string) error { *s = 0 return nil } - n, err := strconv.Atoi(val) + n, err := strconv.Atoi(lower) if err != nil { return fmt.Errorf("invalid submodule depth %q: must be true, false, or a positive integer", val) } @@ -148,10 +148,11 @@ type Options struct { GitCloneSingleBranch bool // GitCloneThinPack clone with thin pack compabilities. This is optional. GitCloneThinPack bool - // GitCloneSubmodules controls submodule initialization after cloning. + // GitCloneSubmoduleDepth controls submodule initialization after cloning. // 0 = disabled (default), positive integer = max recursion depth. - // Accepts "true" (defaults to 10), "false" (0), or a positive integer. - GitCloneSubmodules int + // The flag accepts "true" (defaults to DefaultSubmoduleDepth), "false" + // (0), or a positive integer for the max recursion depth. + GitCloneSubmoduleDepth int // GitUsername is the username to use for Git authentication. This is // optional. GitUsername string @@ -433,7 +434,7 @@ func (o *Options) CLI() serpent.OptionSet { { Flag: "git-clone-submodules", Env: WithEnvPrefix("GIT_CLONE_SUBMODULES"), - Value: SubmoduleDepthOf(&o.GitCloneSubmodules), + Value: SubmoduleDepthOf(&o.GitCloneSubmoduleDepth), Description: "Clone Git submodules after cloning the repository. " + "Accepts 'true' (max depth 10), 'false' (disabled), or a positive integer for max recursion depth.", }, diff --git a/options/options_test.go b/options/options_test.go index cdeec083..8979ea41 100644 --- a/options/options_test.go +++ b/options/options_test.go @@ -85,23 +85,32 @@ func TestEnvOptionParsing(t *testing.T) { o := runCLI() require.Equal(t, o.BinaryPath, val) }) + }) - t.Run("git clone submodules true", func(t *testing.T) { + t.Run("submodule depth", func(t *testing.T) { + t.Run("true", func(t *testing.T) { t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), "true") o := runCLI() - require.Equal(t, 10, o.GitCloneSubmodules) // "true" defaults to depth 10 + // "true" defaults to DefaultSubmoduleDepth. + require.Equal(t, int(options.DefaultSubmoduleDepth), o.GitCloneSubmoduleDepth) }) - t.Run("git clone submodules depth", func(t *testing.T) { + t.Run("integer", func(t *testing.T) { t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), "3") o := runCLI() - require.Equal(t, 3, o.GitCloneSubmodules) + require.Equal(t, 3, o.GitCloneSubmoduleDepth) + }) + + t.Run("integer with whitespace", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), " 5 ") + o := runCLI() + require.Equal(t, 5, o.GitCloneSubmoduleDepth) }) - t.Run("git clone submodules false", func(t *testing.T) { + t.Run("false", func(t *testing.T) { t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), "false") o := runCLI() - require.Equal(t, 0, o.GitCloneSubmodules) + require.Equal(t, 0, o.GitCloneSubmoduleDepth) }) }) } From a46a9599a5e2671a72f066430eac8936d0b7e994 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Thu, 21 May 2026 14:40:10 +0000 Subject: [PATCH 2/3] fix(git): harden submodule cloning and credential handling This commit reworks the submodule path in git/git.go to address a cluster of issues found in review. The changes are correlated so they land together. Credential handling ------------------- - ResolveSubmoduleURL now strips userinfo from the parent endpoint before returning. Without this, a parent URL like https://token@github.com/org/repo would re-emit its credentials into every resolved submodule URL (via go-git's Endpoint.String). The password is always stripped; the user portion is preserved only for ssh:// endpoints where it is the SSH login name rather than a credential. (DEREM-7) - initSubmodules now tracks the parent URL and auth that reached the current recursion level, and nested recursion uses the auth that was forwarded at this level (which may be nil) as the new parent auth. Previously the recursive call copied opts wholesale, so an auth- withheld step still left opts.RepoAuth populated for the next level, allowing two-hop credential exfiltration when the nested submodule matched the malicious host. (DEREM-1) - A new submoduleAuthFor helper centralizes the SameHost gating decision so the same rule applies to every clone and fetch. Submodule lifecycle ------------------- - CloneRepo now invokes initSubmodules even when the parent repo already exists on disk. A transient submodule init failure used to permanently leave the workspace without submodule content because subsequent runs short-circuited on the existing parent. (DEREM-6) - The 'already cloned' path now chroots to '.git' before constructing the filesystem storage, matching the fresh-clone path. The previous layout passed the worktree root as git storage and would fail with a misleading 'repository does not exist' error on any partial-prior- run recovery. (DEREM-5) - The 'already cloned' checkout error now includes the expected commit hash, matching the fresh-clone error. (DEREM-15) - Nested-submodule recursion no longer relies on sub.Repository(), which requires the submodule to be registered in the parent's .git/config via sub.Init(). The custom clone path bypasses sub.Init, so sub.Repository() always returned ErrSubmoduleNotInitialized and nested recursion silently no-op'd. The new helper openSubmoduleRepo opens the on-disk repo we just wrote. (DEREM-8) - The latent nil-pointer path on ErrRepositoryAlreadyExists is gone because the fresh-clone branch is only entered when '.git' did not exist, removing the prior bare !=err guard. (DEREM-11) Other polish ------------- - Submodule clones now propagate opts.Depth. The expected-commit fetch-by-hash fallback already deepens when needed. (DEREM-9) - The submodule loop now checks ctx.Done() at the top of each iteration so cancellation lands promptly between submodules. (DEREM-13) - Fetch error checks use errors.Is(err, git.NoErrAlreadyUpToDate) instead of a bare !=, consistent with the rest of the file. (DEREM-3) - Errors when opening or listing the nested submodule worktree are now logged before continuing instead of being silently swallowed. (DEREM-16) - Removed two unnecessary 'tc := tc' captures in the test file. Go 1.24 makes per-iteration loop variable scoping the default. (DEREM-4) - TestResolveSubmoduleURL now exercises the credential-stripping path, ssh:// and git:// parent URLs, and a parent URL that fails parsing, covering both the previously unused expectErr field and the schemes requested in the earlier review. (DEREM-17, AMREM-2) Addresses DEREM-1, DEREM-3, DEREM-4, DEREM-5, DEREM-6, DEREM-7, DEREM-8, DEREM-9, DEREM-11, DEREM-13, DEREM-15, DEREM-16, DEREM-17, and AMREM-2 on coder/envbuilder#485. --- git/git.go | 281 +++++++++++++++++++++++----------------- git/git_test.go | 41 +++++- options/options_test.go | 1 - 3 files changed, 200 insertions(+), 123 deletions(-) diff --git a/git/git.go b/git/git.go index 233611b2..74eaea56 100644 --- a/git/git.go +++ b/git/git.go @@ -116,37 +116,48 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt if err != nil { return false, fmt.Errorf("open %q: %w", opts.RepoURL, err) } - if repo != nil { - return false, nil - } - repo, err = git.CloneContext(ctx, gitStorage, fs, &git.CloneOptions{ - URL: parsed.Cleaned, - Auth: opts.RepoAuth, - Progress: opts.Progress, - ReferenceName: plumbing.ReferenceName(parsed.Reference), - InsecureSkipTLS: opts.Insecure, - Depth: opts.Depth, - SingleBranch: opts.SingleBranch, - CABundle: opts.CABundle, - ProxyOptions: opts.ProxyOptions, - }) - if errors.Is(err, git.ErrRepositoryAlreadyExists) { - return false, nil - } - if err != nil { - return false, fmt.Errorf("clone %q: %w", opts.RepoURL, err) + alreadyCloned := repo != nil + if !alreadyCloned { + repo, err = git.CloneContext(ctx, gitStorage, fs, &git.CloneOptions{ + URL: parsed.Cleaned, + Auth: opts.RepoAuth, + Progress: opts.Progress, + ReferenceName: plumbing.ReferenceName(parsed.Reference), + InsecureSkipTLS: opts.Insecure, + Depth: opts.Depth, + SingleBranch: opts.SingleBranch, + CABundle: opts.CABundle, + ProxyOptions: opts.ProxyOptions, + }) + if errors.Is(err, git.ErrRepositoryAlreadyExists) { + // The repository was created between our Open and CloneContext + // calls. Reopen it so submodule initialization can still run. + repo, err = git.Open(fsStorage, gitDir) + if err != nil { + return false, fmt.Errorf("reopen existing %q: %w", opts.RepoURL, err) + } + alreadyCloned = true + } + if err != nil { + return false, fmt.Errorf("clone %q: %w", opts.RepoURL, err) + } } - // Initialize submodules if requested + // Initialize submodules on every call, not only after a fresh clone, so + // that a transient failure during the first run can be retried on the + // next workspace start. if opts.SubmoduleDepth > 0 { - err = initSubmodules(ctx, logf, repo, opts, 1) + w, err := repo.Worktree() if err != nil { - return true, fmt.Errorf("init submodules: %w", err) + return !alreadyCloned, fmt.Errorf("get worktree: %w", err) + } + if err := initSubmodules(ctx, logf, repo, w, opts.RepoURL, opts.RepoAuth, opts, 1); err != nil { + return !alreadyCloned, fmt.Errorf("init submodules: %w", err) } } - return true, nil + return !alreadyCloned, nil } // ShallowCloneRepo will clone the repository at the given URL into the given path @@ -459,6 +470,8 @@ func extractHost(u string) string { // SameHost checks if two URLs point to the same host. // Used to determine if credentials should be forwarded to submodules. +// The comparison is hostname-only. Port is ignored to match git's +// credential-helper convention, which keys credentials on the host alone. func SameHost(url1, url2 string) bool { host1 := extractHost(url1) host2 := extractHost(url2) @@ -522,6 +535,15 @@ func ResolveSubmoduleURL(parentURL, submoduleURL string) (string, error) { return "", fmt.Errorf("parse parent URL: %w", err) } + // Credentials embedded in the parent URL must not leak into resolved + // submodule URLs. They should flow only through CloneRepoOptions.RepoAuth, + // which is gated by SameHost. For ssh:// endpoints the user portion is + // the SSH login name rather than a credential, so it is preserved. + parentEP.Password = "" + if !strings.EqualFold(parentEP.Protocol, "ssh") { + parentEP.User = "" + } + // For relative URLs, we need to resolve them against the parent's path. // The parent path represents a repository (like a file in filesystem terms), // so ../something means "sibling repository". @@ -550,21 +572,19 @@ func ResolveSubmoduleURL(parentURL, submoduleURL string) (string, error) { return parentEP.String(), nil } -// initSubmodules recursively initializes and updates all submodules in the repository. -// currentDepth tracks the current recursion level (starts at 1). -func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Repository, opts CloneRepoOptions, currentDepth int) error { +// initSubmodules recursively initializes and updates the submodules of repo. +// currentDepth tracks the current recursion level, starting at 1. parentAuth +// is the auth that was actually used to fetch the current parent. It must be +// the auth for this level, not the root auth, so that a credential withheld +// at any point in the chain stays withheld for every level below it. +func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Repository, parentWorktree *git.Worktree, parentURL string, parentAuth transport.AuthMethod, opts CloneRepoOptions, currentDepth int) error { if currentDepth > opts.SubmoduleDepth { logf("⚠ Skipping nested submodules: max depth %d reached", opts.SubmoduleDepth) return nil } logf("🔗 Initializing git submodules (depth %d/%d)...", currentDepth, opts.SubmoduleDepth) - w, err := repo.Worktree() - if err != nil { - return fmt.Errorf("get worktree: %w", err) - } - - subs, err := w.Submodules() + subs, err := parentWorktree.Submodules() if err != nil { return fmt.Errorf("get submodules: %w", err) } @@ -577,18 +597,21 @@ func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Re logf("Found %d submodule(s)", len(subs)) // Get the parent repository URL for resolving relative submodule URLs - cfg, err := repo.Config() - if err != nil { - return fmt.Errorf("get repo config: %w", err) - } - - parentURL := opts.RepoURL - if origin, hasOrigin := cfg.Remotes["origin"]; hasOrigin && len(origin.URLs) > 0 { - parentURL = origin.URLs[0] + effectiveParentURL := parentURL + if cfg, cfgErr := repo.Config(); cfgErr == nil { + if origin, ok := cfg.Remotes["origin"]; ok && len(origin.URLs) > 0 { + effectiveParentURL = origin.URLs[0] + } } - logf("Parent repository URL: %s", RedactURL(parentURL)) + logf("Parent repository URL: %s", RedactURL(effectiveParentURL)) for _, sub := range subs { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + subConfig := sub.Config() logf("📦 Initializing submodule: %s", subConfig.Name) logf(" Submodule path: %s", subConfig.Path) @@ -602,41 +625,44 @@ func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Re logf(" Expected commit: %s", subStatus.Expected) // Resolve the submodule URL - resolvedURL, err := ResolveSubmoduleURL(parentURL, subConfig.URL) + resolvedURL, err := ResolveSubmoduleURL(effectiveParentURL, subConfig.URL) if err != nil { return fmt.Errorf("resolve submodule URL for %q: %w", subConfig.Name, err) } logf(" Resolved URL: %s", RedactURL(resolvedURL)) + submoduleAuth := submoduleAuthFor(logf, effectiveParentURL, resolvedURL, parentAuth) + // Clone the submodule manually - err = cloneSubmodule(ctx, logf, w, subConfig, subStatus.Expected, resolvedURL, opts) - if err != nil { + if err := cloneSubmodule(ctx, logf, parentWorktree, subConfig, subStatus.Expected, resolvedURL, submoduleAuth, opts); err != nil { return fmt.Errorf("clone submodule %q: %w", subConfig.Name, err) } logf("✓ Submodule initialized: %s", subConfig.Name) - // Recursively handle nested submodules - subRepo, err := sub.Repository() + // Recurse into any nested submodules. We open the on-disk repository + // directly rather than calling sub.Repository(), which requires the + // submodule to be registered in .git/config via sub.Init(). The custom + // clone path does not perform that registration. + if currentDepth >= opts.SubmoduleDepth { + continue + } + subRepo, subWorktree, err := openSubmoduleRepo(parentWorktree, subConfig.Path) if err != nil { - logf(" ⚠ Could not open submodule repository %s: %v", subConfig.Name, err) + logf(" ⚠ Could not open submodule repository %s for nested traversal: %v", subConfig.Name, err) continue } - - // Check for nested submodules - subWorktree, err := subRepo.Worktree() - if err == nil { - nestedSubs, err := subWorktree.Submodules() - if err == nil && len(nestedSubs) > 0 { - logf(" Found %d nested submodule(s) in %s", len(nestedSubs), subConfig.Name) - // Create new opts with the submodule's URL as the parent - nestedOpts := opts - nestedOpts.RepoURL = resolvedURL - err = initSubmodules(ctx, logf, subRepo, nestedOpts, currentDepth+1) - if err != nil { - return fmt.Errorf("init nested submodules in %q: %w", subConfig.Name, err) - } - } + nestedSubs, err := subWorktree.Submodules() + if err != nil { + logf(" ⚠ Could not list nested submodules in %s: %v", subConfig.Name, err) + continue + } + if len(nestedSubs) == 0 { + continue + } + logf(" Found %d nested submodule(s) in %s", len(nestedSubs), subConfig.Name) + if err := initSubmodules(ctx, logf, subRepo, subWorktree, resolvedURL, submoduleAuth, opts, currentDepth+1); err != nil { + return fmt.Errorf("init nested submodules in %q: %w", subConfig.Name, err) } } @@ -644,73 +670,92 @@ func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Re return nil } -// cloneSubmodule manually clones a submodule repository -func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktree *git.Worktree, subConfig *config.Submodule, expectedHash plumbing.Hash, resolvedURL string, opts CloneRepoOptions) error { - // Get the submodule directory within the parent worktree - submodulePath := subConfig.Path +// submoduleAuthFor returns the auth to use when fetching a submodule. It +// returns parentAuth if the submodule shares the parent's host, and nil +// otherwise. A warning is logged when parent auth is set but withheld +// because the hosts differ. +func submoduleAuthFor(logf func(string, ...any), parentURL, submoduleURL string, parentAuth transport.AuthMethod) transport.AuthMethod { + if parentAuth == nil { + return nil + } + if SameHost(parentURL, submoduleURL) { + return parentAuth + } + logf(" ⚠ Not forwarding auth to submodule (different host: %s)", extractHost(submoduleURL)) + return nil +} - // Create the submodule directory +// openSubmoduleRepo opens the on-disk repository written by cloneSubmodule +// at parentWorktree/submodulePath/.git and returns it along with its worktree. +func openSubmoduleRepo(parentWorktree *git.Worktree, submodulePath string) (*git.Repository, *git.Worktree, error) { subFS, err := parentWorktree.Filesystem.Chroot(submodulePath) if err != nil { - return fmt.Errorf("chroot to submodule path: %w", err) + return nil, nil, fmt.Errorf("chroot to submodule path: %w", err) + } + subGitDir, err := subFS.Chroot(".git") + if err != nil { + return nil, nil, fmt.Errorf("chroot to .git: %w", err) + } + subRepo, err := git.Open( + filesystem.NewStorage(subGitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)), + subFS, + ) + if err != nil { + return nil, nil, fmt.Errorf("open submodule repository: %w", err) + } + subWorktree, err := subRepo.Worktree() + if err != nil { + return nil, nil, fmt.Errorf("get submodule worktree: %w", err) } + return subRepo, subWorktree, nil +} - // Security: Only forward parent repo auth if submodule is on the same host. - // This prevents credential exfiltration if a malicious .gitmodules points - // to an attacker-controlled server. - var submoduleAuth transport.AuthMethod - if SameHost(opts.RepoURL, resolvedURL) { - submoduleAuth = opts.RepoAuth - } else if opts.RepoAuth != nil { - logf(" ⚠ Not forwarding auth to submodule (different host: %s)", extractHost(resolvedURL)) +// cloneSubmodule clones a single submodule into the parent worktree. +// The caller is responsible for deciding whether to forward auth, so +// submoduleAuth may be nil even when the parent was authenticated. +func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktree *git.Worktree, subConfig *config.Submodule, expectedHash plumbing.Hash, resolvedURL string, submoduleAuth transport.AuthMethod, opts CloneRepoOptions) error { + // Get the submodule directory within the parent worktree + subFS, err := parentWorktree.Filesystem.Chroot(subConfig.Path) + if err != nil { + return fmt.Errorf("chroot to submodule path: %w", err) } // Check if already cloned - _, err = subFS.Stat(".git") - if err == nil { + if _, statErr := subFS.Stat(".git"); statErr == nil { logf(" Submodule already cloned, checking out expected commit...") // Open the existing repository + subGitDir, err := subFS.Chroot(".git") + if err != nil { + return fmt.Errorf("chroot to existing .git: %w", err) + } subRepo, err := git.Open( - filesystem.NewStorage(subFS, cache.NewObjectLRU(cache.DefaultMaxSize)), + filesystem.NewStorage(subGitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)), subFS, ) if err != nil { return fmt.Errorf("open existing submodule: %w", err) } - - subWorktree, err := subRepo.Worktree() - if err != nil { - return fmt.Errorf("get submodule worktree: %w", err) - } - - // Checkout the expected commit - err = subWorktree.Checkout(&git.CheckoutOptions{ - Hash: expectedHash, - }) - if err != nil { - return fmt.Errorf("checkout expected commit: %w", err) - } - return nil + return checkoutSubmoduleCommit(ctx, logf, subRepo, expectedHash, submoduleAuth, opts) } // Clone the submodule logf(" Cloning submodule from: %s", RedactURL(resolvedURL)) // Create .git directory for the submodule - err = subFS.MkdirAll(".git", 0o755) - if err != nil { + if err := subFS.MkdirAll(".git", 0o755); err != nil { return fmt.Errorf("create .git directory: %w", err) } - subGitDir, err := subFS.Chroot(".git") if err != nil { return fmt.Errorf("chroot to .git: %w", err) } - gitStorage := filesystem.NewStorage(subGitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)) - // Clone the submodule repository - // Use SingleBranch=false to fetch all branches so we can find the commit + // Clone the submodule repository. SingleBranch is false so all branches + // are fetched and the expected commit is reachable. We honor the parent's + // clone depth. If the expected commit is not reachable from the shallow + // tip, the fetch-by-hash path in checkoutSubmoduleCommit will deepen as + // needed. subRepo, err := git.CloneContext(ctx, gitStorage, subFS, &git.CloneOptions{ URL: resolvedURL, Auth: submoduleAuth, @@ -718,20 +763,27 @@ func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktr InsecureSkipTLS: opts.Insecure, CABundle: opts.CABundle, ProxyOptions: opts.ProxyOptions, - SingleBranch: false, // Fetch all branches - NoCheckout: true, // Don't checkout yet, we'll do it manually + Depth: opts.Depth, + SingleBranch: false, + NoCheckout: true, }) - if err != nil && !errors.Is(err, git.ErrRepositoryAlreadyExists) { + if err != nil { return fmt.Errorf("clone submodule repository: %w", err) } + return checkoutSubmoduleCommit(ctx, logf, subRepo, expectedHash, submoduleAuth, opts) +} + +// checkoutSubmoduleCommit ensures expectedHash is present in subRepo, +// fetching it from the remote if it is not already there, and then checks +// it out into the submodule's worktree. +func checkoutSubmoduleCommit(ctx context.Context, logf func(string, ...any), subRepo *git.Repository, expectedHash plumbing.Hash, submoduleAuth transport.AuthMethod, opts CloneRepoOptions) error { // Verify the commit exists logf(" Verifying commit exists: %s", expectedHash) - _, err = subRepo.CommitObject(expectedHash) - if err != nil { + if _, err := subRepo.CommitObject(expectedHash); err != nil { // Commit not found, try fetching with the specific hash logf(" Commit not found, attempting to fetch it directly...") - err = subRepo.FetchContext(ctx, &git.FetchOptions{ + fetchErr := subRepo.FetchContext(ctx, &git.FetchOptions{ RemoteName: "origin", RefSpecs: []config.RefSpec{ config.RefSpec("+" + expectedHash.String() + ":" + expectedHash.String()), @@ -742,10 +794,10 @@ func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktr CABundle: opts.CABundle, ProxyOptions: opts.ProxyOptions, }) - if err != nil && err != git.NoErrAlreadyUpToDate { + if fetchErr != nil && !errors.Is(fetchErr, git.NoErrAlreadyUpToDate) { // If that fails, try fetching all refs logf(" Direct fetch failed, fetching all refs...") - err = subRepo.FetchContext(ctx, &git.FetchOptions{ + fetchAllErr := subRepo.FetchContext(ctx, &git.FetchOptions{ RemoteName: "origin", Auth: submoduleAuth, Progress: opts.Progress, @@ -753,14 +805,12 @@ func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktr CABundle: opts.CABundle, ProxyOptions: opts.ProxyOptions, }) - if err != nil && err != git.NoErrAlreadyUpToDate { - return fmt.Errorf("fetch commit %s: %w", expectedHash, err) + if fetchAllErr != nil && !errors.Is(fetchAllErr, git.NoErrAlreadyUpToDate) { + return fmt.Errorf("fetch commit %s: %w", expectedHash, fetchAllErr) } } - // Verify again - _, err = subRepo.CommitObject(expectedHash) - if err != nil { + if _, err := subRepo.CommitObject(expectedHash); err != nil { return fmt.Errorf("commit %s still not found after fetch: %w", expectedHash, err) } } @@ -771,13 +821,8 @@ func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktr if err != nil { return fmt.Errorf("get submodule worktree: %w", err) } - - err = subWorktree.Checkout(&git.CheckoutOptions{ - Hash: expectedHash, - }) - if err != nil { + if err := subWorktree.Checkout(&git.CheckoutOptions{Hash: expectedHash}); err != nil { return fmt.Errorf("checkout expected commit %s: %w", expectedHash, err) } - return nil } diff --git a/git/git_test.go b/git/git_test.go index 3096ab28..5c97ed57 100644 --- a/git/git_test.go +++ b/git/git_test.go @@ -671,7 +671,6 @@ func TestRedactURL(t *testing.T) { } for _, tc := range cases { - tc := tc t.Run(tc.name, func(t *testing.T) { t.Parallel() got := git.RedactURL(tc.input) @@ -769,7 +768,6 @@ func TestSameHost(t *testing.T) { } for _, tc := range cases { - tc := tc t.Run(tc.name, func(t *testing.T) { t.Parallel() got := git.SameHost(tc.url1, tc.url2) @@ -842,10 +840,45 @@ func TestResolveSubmoduleURL(t *testing.T) { subURL: "https://github.com/other/submodule.git", expect: "https://github.com/other/submodule.git", }, + { + name: "httpsParentWithTokenStripped", + parentURL: "https://token123@github.com/org/main.git", + subURL: "../deps/lib.git", + expect: "https://github.com/org/deps/lib.git", + }, + { + name: "httpsParentWithUserPassStripped", + parentURL: "https://user:pass@example.com/org/main.git", + subURL: "./extras/tool.git", + expect: "https://example.com/org/main.git/extras/tool.git", + }, + { + name: "sshSchemeUserPreserved", + parentURL: "ssh://deploy@host.tld/org/main.git", + subURL: "../deps/lib.git", + expect: "ssh://deploy@host.tld/org/deps/lib.git", + }, + { + name: "sshSchemePasswordStripped", + parentURL: "ssh://deploy:secret@host.tld/org/main.git", + subURL: "../deps/lib.git", + expect: "ssh://deploy@host.tld/org/deps/lib.git", + }, + { + name: "gitSchemeRelative", + parentURL: "git://host.tld/org/main.git", + subURL: "../deps/lib.git", + expect: "git://host.tld/org/deps/lib.git", + }, + { + name: "unparseableParent", + parentURL: "https://[bad-bracket", + subURL: "./child", + expectErr: "parse parent URL", + }, } - for _, tc := range cases { - c := tc + for _, c := range cases { t.Run(c.name, func(t *testing.T) { t.Parallel() got, err := git.ResolveSubmoduleURL(c.parentURL, c.subURL) diff --git a/options/options_test.go b/options/options_test.go index 8979ea41..a835b2cd 100644 --- a/options/options_test.go +++ b/options/options_test.go @@ -91,7 +91,6 @@ func TestEnvOptionParsing(t *testing.T) { t.Run("true", func(t *testing.T) { t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), "true") o := runCLI() - // "true" defaults to DefaultSubmoduleDepth. require.Equal(t, int(options.DefaultSubmoduleDepth), o.GitCloneSubmoduleDepth) }) From c43e58f4262f775147c8c6e3a6416f63ed04d3a0 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Thu, 21 May 2026 14:40:17 +0000 Subject: [PATCH 3/3] test(git): cover submodule auth gating and inline submodule test helper Add an internal-package test for submoduleAuthFor and the credential-chain invariant: once auth is withheld at one level of submodule recursion, deeper levels must keep it nil even when their hosts match. Inline gittest.CreateGitServerWithSubmodule into its single caller in the integration test, and drop the helper. The integration test now builds its parent and submodule servers directly, which keeps the setup visible in the test it serves. --- git/submodule_auth_internal_test.go | 88 +++++++++++++++++++++++++++++ integration/integration_test.go | 31 ++++++---- testutil/gittest/gittest.go | 53 ----------------- 3 files changed, 107 insertions(+), 65 deletions(-) create mode 100644 git/submodule_auth_internal_test.go diff --git a/git/submodule_auth_internal_test.go b/git/submodule_auth_internal_test.go new file mode 100644 index 00000000..f9c0b959 --- /dev/null +++ b/git/submodule_auth_internal_test.go @@ -0,0 +1,88 @@ +package git + +import ( + "bytes" + "testing" + + "github.com/go-git/go-git/v5/plumbing/transport" + "github.com/stretchr/testify/require" +) + +func TestSubmoduleAuthFor(t *testing.T) { + t.Parallel() + + type fakeAuth struct{ transport.AuthMethod } + parentAuth := fakeAuth{} + + cases := []struct { + name string + parentURL string + submoduleURL string + parentAuth transport.AuthMethod + wantAuth transport.AuthMethod + wantWarn bool + }{ + { + name: "noParentAuth", + parentURL: "https://github.com/org/parent.git", + submoduleURL: "https://github.com/org/sub.git", + }, + { + name: "sameHostForwards", + parentURL: "https://github.com/org/parent.git", + submoduleURL: "https://github.com/org/sub.git", + parentAuth: parentAuth, + wantAuth: parentAuth, + }, + { + name: "differentHostWithholdsAndWarns", + parentURL: "https://github.com/org/parent.git", + submoduleURL: "https://evil.com/exfil.git", + parentAuth: parentAuth, + wantWarn: true, + }, + { + name: "differentHostNoParentAuthNoWarn", + parentURL: "https://github.com/org/parent.git", + submoduleURL: "https://evil.com/exfil.git", + }, + { + name: "scpAndHttpsSameHost", + parentURL: "git@github.com:org/parent.git", + submoduleURL: "https://github.com/org/sub.git", + parentAuth: parentAuth, + wantAuth: parentAuth, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + t.Parallel() + var buf bytes.Buffer + logf := func(format string, _ ...any) { _, _ = buf.WriteString(format) } + got := submoduleAuthFor(logf, c.parentURL, c.submoduleURL, c.parentAuth) + require.Equal(t, c.wantAuth, got) + if c.wantWarn { + require.Contains(t, buf.String(), "Not forwarding auth") + } else { + require.NotContains(t, buf.String(), "Not forwarding auth") + } + }) + } +} + +// Once auth is withheld at one level of submodule recursion, it must stay +// withheld for every level below, even when the deeper hosts match each other. +func TestSubmoduleAuthChainStaysWithheld(t *testing.T) { + t.Parallel() + + type fakeAuth struct{ transport.AuthMethod } + rootAuth := fakeAuth{} + logf := func(string, ...any) {} + + level1 := submoduleAuthFor(logf, "https://github.com/org/parent.git", "https://evil.com/repo.git", rootAuth) + require.Nil(t, level1) + + level2 := submoduleAuthFor(logf, "https://evil.com/repo.git", "https://evil.com/nested.git", level1) + require.Nil(t, level2) +} diff --git a/integration/integration_test.go b/integration/integration_test.go index 24af51d9..82f0c4c8 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -33,6 +33,7 @@ import ( "github.com/coder/envbuilder/testutil/gittest" "github.com/coder/envbuilder/testutil/mwtest" "github.com/coder/envbuilder/testutil/registrytest" + "github.com/go-git/go-billy/v5/memfs" "github.com/go-git/go-billy/v5/osfs" gossh "golang.org/x/crypto/ssh" @@ -421,16 +422,22 @@ func TestSucceedsGitAuth(t *testing.T) { func TestGitSubmodules(t *testing.T) { t.Parallel() - // Create parent repo with a submodule - parentSrv, _ := gittest.CreateGitServerWithSubmodule(t, gittest.Options{ - Files: map[string]string{ - "Dockerfile": "FROM " + testImageAlpine, - }, - }, gittest.Options{ - Files: map[string]string{ - "subfile.txt": "submodule content", - }, - }) + submoduleFS := memfs.New() + submoduleRepo := gittest.NewRepo(t, submoduleFS, + gittest.Commit(t, "subfile.txt", "submodule content", "submodule commit"), + ) + submoduleHead, err := submoduleRepo.Head() + require.NoError(t, err) + submoduleSrv := httptest.NewServer(mwtest.BasicAuthMW("", "")(gittest.NewServer(submoduleFS))) + t.Cleanup(submoduleSrv.Close) + + parentFS := memfs.New() + _ = gittest.NewRepo(t, parentFS, + gittest.Commit(t, "Dockerfile", "FROM "+testImageAlpine, "my test commit"), + gittest.CommitSubmodule(t, "submod", submoduleSrv.URL, submoduleHead.Hash()), + ) + parentSrv := httptest.NewServer(mwtest.BasicAuthMW("", "")(gittest.NewServer(parentFS))) + t.Cleanup(parentSrv.Close) ctr, err := runEnvbuilder(t, runOpts{env: []string{ envbuilderEnv("GIT_URL", parentSrv.URL), @@ -439,11 +446,11 @@ func TestGitSubmodules(t *testing.T) { }}) require.NoError(t, err) - // Verify the .gitmodules file exists gitmodules := execContainer(t, ctr, "cat /workspaces/empty/.gitmodules") require.Contains(t, gitmodules, "[submodule") - // Verify the submodule was actually cloned by checking for the file inside it + // Read a committed file from the submodule worktree to confirm that the + // submodule was actually checked out, not just registered in .gitmodules. subfileContent := execContainer(t, ctr, "cat /workspaces/empty/submod/subfile.txt") require.Contains(t, subfileContent, "submodule content") } diff --git a/testutil/gittest/gittest.go b/testutil/gittest/gittest.go index e9bac660..402a0b5a 100644 --- a/testutil/gittest/gittest.go +++ b/testutil/gittest/gittest.go @@ -273,59 +273,6 @@ func NewRepo(t *testing.T, fs billy.Filesystem, commits ...CommitFunc) *git.Repo return repo } -// CreateGitServerWithSubmodule creates a parent git repo with a submodule pointing to another repo. -// Returns the parent server and the submodule server. -// The submodule is properly registered with a gitlink entry in the tree. -func CreateGitServerWithSubmodule(t *testing.T, opts Options, submoduleOpts Options) (parentSrv *httptest.Server, submoduleSrv *httptest.Server) { - t.Helper() - - // Create the submodule repo first and get its HEAD commit - submoduleFS := memfs.New() - submoduleCommits := make([]CommitFunc, 0) - for path, content := range submoduleOpts.Files { - submoduleCommits = append(submoduleCommits, Commit(t, path, content, "submodule commit")) - } - submoduleRepo := NewRepo(t, submoduleFS, submoduleCommits...) - - // Get the submodule's HEAD commit hash - submoduleHead, err := submoduleRepo.Head() - require.NoError(t, err) - submoduleHash := submoduleHead.Hash() - - // Start the submodule server - if submoduleOpts.AuthMW == nil { - submoduleOpts.AuthMW = mwtest.BasicAuthMW(submoduleOpts.Username, submoduleOpts.Password) - } - if submoduleOpts.TLS { - submoduleSrv = httptest.NewTLSServer(submoduleOpts.AuthMW(NewServer(submoduleFS))) - } else { - submoduleSrv = httptest.NewServer(submoduleOpts.AuthMW(NewServer(submoduleFS))) - } - - // Create the parent repo with .gitmodules and gitlink entry - if opts.AuthMW == nil { - opts.AuthMW = mwtest.BasicAuthMW(opts.Username, opts.Password) - } - - parentFS := memfs.New() - commits := make([]CommitFunc, 0) - for path, content := range opts.Files { - commits = append(commits, Commit(t, path, content, "my test commit")) - } - - // Add .gitmodules file and gitlink entry for the submodule - commits = append(commits, CommitSubmodule(t, "submod", submoduleSrv.URL, submoduleHash)) - - _ = NewRepo(t, parentFS, commits...) - - if opts.TLS { - parentSrv = httptest.NewTLSServer(opts.AuthMW(NewServer(parentFS))) - } else { - parentSrv = httptest.NewServer(opts.AuthMW(NewServer(parentFS))) - } - return parentSrv, submoduleSrv -} - // CommitSubmodule creates a commit that adds a submodule with proper .gitmodules and gitlink entry. func CommitSubmodule(t *testing.T, path, url string, hash plumbing.Hash) CommitFunc { return func(fs billy.Filesystem, repo *git.Repository) {