From 6a3f31b0a6c7c48131b9f048b9adbea0d0d7e863 Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Sun, 5 Apr 2026 00:56:12 +0200 Subject: [PATCH 1/6] add init-period flag for faster initial sync retries --- main.go | 63 ++++++++++++++++++++++++++++++++++-- main_test.go | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 5c4c3c13a..2ae4d4b39 100644 --- a/main.go +++ b/main.go @@ -186,6 +186,12 @@ func main() { flPeriod := pflag.Duration("period", envDuration(10*time.Second, "GITSYNC_PERIOD", "GIT_SYNC_PERIOD"), "how long to wait between syncs, must be >= 10ms; --wait overrides this") + flInitPeriod := pflag.Duration("init-period", + envDuration(0, "GITSYNC_INIT_PERIOD"), + "how long to wait between sync attempts until the first success, must be >= 10ms if set; if unset, --period is used") + flInitTimeout := pflag.Duration("init-timeout", + envDuration(0, "GITSYNC_INIT_TIMEOUT"), + "the max time allowed for the initial sync to succeed; if unset, there is no timeout (retries forever until --max-failures)") flSyncTimeout := pflag.Duration("sync-timeout", envDuration(120*time.Second, "GITSYNC_SYNC_TIMEOUT", "GIT_SYNC_SYNC_TIMEOUT"), "the total time allowed for one complete sync, must be >= 10ms; --timeout overrides this") @@ -469,6 +475,12 @@ func main() { if *flPeriod < 10*time.Millisecond { fatalConfigErrorf(log, true, "invalid flag: --period must be at least 10ms") } + if *flInitPeriod != 0 && *flInitPeriod < 10*time.Millisecond { + fatalConfigErrorf(log, true, "invalid flag: --init-period must be at least 10ms") + } + if *flInitTimeout != 0 && *flInitTimeout < 10*time.Millisecond { + fatalConfigErrorf(log, true, "invalid flag: --init-timeout must be at least 10ms") + } if *flDeprecatedChmod != 0 { fatalConfigErrorf(log, true, "deprecated flag: --change-permissions is no longer supported") @@ -912,6 +924,11 @@ func main() { failCount := 0 syncCount := uint64(0) + initialSyncDone := false + var initStart time.Time + if *flInitPeriod != 0 || *flInitTimeout != 0 { + initStart = time.Now() + } for { start := time.Now() ctx, cancel := context.WithTimeout(context.Background(), *flSyncTimeout) @@ -919,6 +936,10 @@ func main() { if changed, hash, err := git.SyncRepo(ctx, refreshCreds); err != nil { failCount++ updateSyncMetrics(metricKeyError, start) + if isInitTimedOut(*flInitTimeout, initialSyncDone, initStart) { + log.Error(err, "initial sync timed out", "initTimeout", flInitTimeout.String(), "failCount", failCount) + os.Exit(1) + } if *flMaxFailures >= 0 && failCount >= *flMaxFailures { // Exit after too many retries, maybe the error is not recoverable. log.Error(err, "too many failures, aborting", "failCount", failCount) @@ -926,6 +947,12 @@ func main() { } log.Error(err, "error syncing repo, will retry", "failCount", failCount) } else { + if !initialSyncDone { + initialSyncDone = true + if *flInitPeriod != 0 { + log.V(0).Info("initial sync complete, switching to normal period", "initPeriod", flInitPeriod.String(), "period", flPeriod.String()) + } + } // this might have been called before, but also might not have setRepoReady() // We treat the first loop as a sync, including sending hooks. @@ -989,12 +1016,14 @@ func main() { log.DeleteErrorFile() } - log.V(3).Info("next sync", "waitTime", flPeriod.String(), "syncCount", syncCount) + // Use init-period for retries before the first successful sync. + waitTime := chooseWaitTime(*flPeriod, *flInitPeriod, initialSyncDone) + log.V(3).Info("next sync", "waitTime", waitTime.String(), "syncCount", syncCount) cancel() // Sleep until the next sync. If syncSig is set then the sleep may // be interrupted by that signal. - t := time.NewTimer(*flPeriod) + t := time.NewTimer(waitTime) select { case <-t.C: case <-sigChan: @@ -1144,6 +1173,24 @@ func setRepoReady() { repoReady = true } +// chooseWaitTime returns the appropriate wait duration based on whether the +// initial sync has completed. If initPeriod is non-zero and the initial sync +// is not yet done, it returns initPeriod. Otherwise it returns period. +func chooseWaitTime(period, initPeriod time.Duration, initialSyncDone bool) time.Duration { + if !initialSyncDone && initPeriod != 0 { + return initPeriod + } + return period +} + +// isInitTimedOut returns true if the initial sync has exceeded the init-timeout. +func isInitTimedOut(initTimeout time.Duration, initialSyncDone bool, initStart time.Time) bool { + if initTimeout == 0 || initialSyncDone { + return false + } + return time.Since(initStart) >= initTimeout +} + // Do no work, but don't do something that triggers go's runtime into thinking // it is deadlocked. func sleepForever() { @@ -2589,6 +2636,18 @@ OPTIONS dynamic credentials from an external secrets system). See also $GITSYNC_PASSWORD. + --init-period , $GITSYNC_INIT_PERIOD + How long to wait between sync attempts until the first successful + sync. Once the initial sync succeeds, --period is used instead. + This must be at least 10ms if set. If not specified, --period is + used for all sync attempts. + + --init-timeout , $GITSYNC_INIT_TIMEOUT + The maximum amount of time to keep retrying the initial sync + before failing. If not specified, git-sync retries forever (or + until --max-failures is reached). This must be at least 10ms if + set. + --period , $GITSYNC_PERIOD How long to wait between sync attempts. This must be at least 10ms. This flag obsoletes --wait, but if --wait is specified, it diff --git a/main_test.go b/main_test.go index 13de2a4e9..49c20049a 100644 --- a/main_test.go +++ b/main_test.go @@ -425,6 +425,96 @@ func TestTouch(t *testing.T) { } } +func TestChooseWaitTime(t *testing.T) { + period := 10 * time.Second + initPeriod := 500 * time.Millisecond + + cases := []struct { + name string + period time.Duration + initPeriod time.Duration + initialSyncDone bool + expected time.Duration + }{{ + name: "no init-period, not done", + period: period, + initPeriod: 0, + initialSyncDone: false, + expected: period, + }, { + name: "no init-period, done", + period: period, + initPeriod: 0, + initialSyncDone: true, + expected: period, + }, { + name: "init-period set, not done", + period: period, + initPeriod: initPeriod, + initialSyncDone: false, + expected: initPeriod, + }, { + name: "init-period set, done", + period: period, + initPeriod: initPeriod, + initialSyncDone: true, + expected: period, + }} + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := chooseWaitTime(tc.period, tc.initPeriod, tc.initialSyncDone) + if got != tc.expected { + t.Errorf("expected %v, got %v", tc.expected, got) + } + }) + } +} + +func TestIsInitTimedOut(t *testing.T) { + cases := []struct { + name string + initTimeout time.Duration + initialSyncDone bool + elapsed time.Duration + expected bool + }{{ + name: "no timeout set", + initTimeout: 0, + initialSyncDone: false, + elapsed: time.Hour, + expected: false, + }, { + name: "timeout set, already done", + initTimeout: time.Second, + initialSyncDone: true, + elapsed: time.Hour, + expected: false, + }, { + name: "timeout set, not done, not expired", + initTimeout: 10 * time.Second, + initialSyncDone: false, + elapsed: time.Second, + expected: false, + }, { + name: "timeout set, not done, expired", + initTimeout: time.Second, + initialSyncDone: false, + elapsed: 2 * time.Second, + expected: true, + }} + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + initStart := time.Now().Add(-tc.elapsed) + got := isInitTimedOut(tc.initTimeout, tc.initialSyncDone, initStart) + if got != tc.expected { + t.Errorf("expected %v, got %v", tc.expected, got) + } + }) + } +} + func TestHasGitLockFile(t *testing.T) { testCases := map[string]struct { inputFilePath []string From 38c837b466f84e0a6df8aeee98a45f3112247c45 Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Sun, 5 Apr 2026 09:50:44 +0200 Subject: [PATCH 2/6] address review: alphabetize flags, add readme docs, add e2e tests --- README.md | 11 +++++++++++ main.go | 6 +++--- test_e2e.sh | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e05e9a693..9d6639bef 100644 --- a/README.md +++ b/README.md @@ -365,6 +365,17 @@ OPTIONS Enable the pprof debug endpoints on git-sync's HTTP endpoint at /debug/pprof. Requires --http-bind to be specified. + --init-period , $GITSYNC_INIT_PERIOD + How long to wait between sync attempts until the first successful + sync. This must be at least 10ms if set. If not specified, the + value of --period is used for initial sync attempts as well. + + --init-timeout , $GITSYNC_INIT_TIMEOUT + The maximum time allowed for the initial sync to succeed. If the + first successful sync does not occur within this duration, git-sync + will exit with an error. If not specified, there is no timeout and + git-sync will retry until --max-failures is reached. + --link , $GITSYNC_LINK The path to at which to create a symlink which points to the current git directory, at the currently synced hash. This may be diff --git a/main.go b/main.go index 2ae4d4b39..6d6b623bf 100644 --- a/main.go +++ b/main.go @@ -183,15 +183,15 @@ func main() { flErrorFile := pflag.String("error-file", envString("", "GITSYNC_ERROR_FILE", "GIT_SYNC_ERROR_FILE"), "the path (absolute or relative to --root) to an optional file into which errors will be written (defaults to disabled)") - flPeriod := pflag.Duration("period", - envDuration(10*time.Second, "GITSYNC_PERIOD", "GIT_SYNC_PERIOD"), - "how long to wait between syncs, must be >= 10ms; --wait overrides this") flInitPeriod := pflag.Duration("init-period", envDuration(0, "GITSYNC_INIT_PERIOD"), "how long to wait between sync attempts until the first success, must be >= 10ms if set; if unset, --period is used") flInitTimeout := pflag.Duration("init-timeout", envDuration(0, "GITSYNC_INIT_TIMEOUT"), "the max time allowed for the initial sync to succeed; if unset, there is no timeout (retries forever until --max-failures)") + flPeriod := pflag.Duration("period", + envDuration(10*time.Second, "GITSYNC_PERIOD", "GIT_SYNC_PERIOD"), + "how long to wait between syncs, must be >= 10ms; --wait overrides this") flSyncTimeout := pflag.Duration("sync-timeout", envDuration(120*time.Second, "GITSYNC_SYNC_TIMEOUT", "GIT_SYNC_SYNC_TIMEOUT"), "the total time allowed for one complete sync, must be >= 10ms; --timeout overrides this") diff --git a/test_e2e.sh b/test_e2e.sh index 8966c79c5..9d5a3caaf 100755 --- a/test_e2e.sh +++ b/test_e2e.sh @@ -3683,6 +3683,56 @@ function e2e::exechook_git_archive() { assert_tgz_archive "$ROOT/link/archive.tgz" } +############################################## +# Test init-period uses faster interval for initial sync +############################################## +function e2e::init_period_faster_initial_sync() { + # First sync + echo "${FUNCNAME[0]} 1" > "$REPO/file" + git -C "$REPO" commit -qam "${FUNCNAME[0]} 1" + + GIT_SYNC \ + --period=100s \ + --init-period=100ms \ + --repo="file://$REPO" \ + --root="$ROOT" \ + --link="link" \ + & + # With init-period=100ms, sync should happen quickly even though + # period=100s. If init-period were not working, this would time out. + wait_for_sync "${MAXWAIT}" + assert_link_exists "$ROOT/link" + assert_file_exists "$ROOT/link/file" + assert_file_eq "$ROOT/link/file" "${FUNCNAME[0]} 1" + assert_metric_eq "${METRIC_GOOD_SYNC_COUNT}" 1 + + # After initial sync, period should switch to the normal 100s. + # Make a new commit and verify it does NOT sync quickly (because + # we're now using the slow period). + echo "${FUNCNAME[0]} 2" > "$REPO/file" + git -C "$REPO" commit -qam "${FUNCNAME[0]} 2" + # Wait a bit - should NOT have synced since normal period is 100s + sleep 3 + assert_file_eq "$ROOT/link/file" "${FUNCNAME[0]} 1" + assert_metric_eq "${METRIC_GOOD_SYNC_COUNT}" 1 +} + +############################################## +# Test init-timeout exits on timeout +############################################## +function e2e::init_timeout_expires() { + assert_fail \ + GIT_SYNC \ + --git="/$SLOW_GIT_FETCH" \ + --period=100ms \ + --init-timeout=1s \ + --max-failures=-1 \ + --repo="file://$REPO" \ + --root="$ROOT" \ + --link="link" + assert_file_absent "$ROOT/link/file" +} + # # main # From e599c33630e026162e71bdb700015f6102a61849 Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Mon, 6 Apr 2026 12:35:43 +0200 Subject: [PATCH 3/6] move init flags to alphabetical order in man text --- main.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/main.go b/main.go index 6d6b623bf..2a8525f43 100644 --- a/main.go +++ b/main.go @@ -2603,6 +2603,18 @@ OPTIONS Enable the pprof debug endpoints on git-sync's HTTP endpoint at /debug/pprof. Requires --http-bind to be specified. + --init-period , $GITSYNC_INIT_PERIOD + How long to wait between sync attempts until the first successful + sync. Once the initial sync succeeds, --period is used instead. + This must be at least 10ms if set. If not specified, --period is + used for all sync attempts. + + --init-timeout , $GITSYNC_INIT_TIMEOUT + The maximum amount of time to keep retrying the initial sync + before failing. If not specified, git-sync retries forever (or + until --max-failures is reached). This must be at least 10ms if + set. + --link , $GITSYNC_LINK The path to at which to create a symlink which points to the current git directory, at the currently synced hash. This may be @@ -2636,18 +2648,6 @@ OPTIONS dynamic credentials from an external secrets system). See also $GITSYNC_PASSWORD. - --init-period , $GITSYNC_INIT_PERIOD - How long to wait between sync attempts until the first successful - sync. Once the initial sync succeeds, --period is used instead. - This must be at least 10ms if set. If not specified, --period is - used for all sync attempts. - - --init-timeout , $GITSYNC_INIT_TIMEOUT - The maximum amount of time to keep retrying the initial sync - before failing. If not specified, git-sync retries forever (or - until --max-failures is reached). This must be at least 10ms if - set. - --period , $GITSYNC_PERIOD How long to wait between sync attempts. This must be at least 10ms. This flag obsoletes --wait, but if --wait is specified, it From 5c9a247d29de833d60b8b2ac8894daf8db6f6cf7 Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Wed, 15 Apr 2026 01:18:35 +0200 Subject: [PATCH 4/6] fix init-timeout e2e test to actually trigger init timeout --- test_e2e.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test_e2e.sh b/test_e2e.sh index 9d5a3caaf..8d71fe779 100755 --- a/test_e2e.sh +++ b/test_e2e.sh @@ -3723,11 +3723,10 @@ function e2e::init_period_faster_initial_sync() { function e2e::init_timeout_expires() { assert_fail \ GIT_SYNC \ - --git="/$SLOW_GIT_FETCH" \ --period=100ms \ --init-timeout=1s \ --max-failures=-1 \ - --repo="file://$REPO" \ + --repo="file:///does/not/exist" \ --root="$ROOT" \ --link="link" assert_file_absent "$ROOT/link/file" From 94c27c0387dcc068f24d656a66e9a863daf99271 Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Wed, 22 Apr 2026 16:19:28 +0200 Subject: [PATCH 5/6] switch init-timeout to init-max-failures per review - rename flag and helper to count-based (int, not duration) - skip --max-failures during init when init-max-failures is set so main-max does not override init tolerance - add SYNC PHASES section to --man and README describing init vs steady-state - update e2e test to use a bad server path, as suggested in review --- README.md | 36 ++++++++++++++++++++++------- main.go | 65 ++++++++++++++++++++++++++++++++++------------------ main_test.go | 45 ++++++++++++++++++++++-------------- test_e2e.sh | 6 ++--- 4 files changed, 102 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 9d6639bef..943955cd0 100644 --- a/README.md +++ b/README.md @@ -206,6 +206,24 @@ CONTRACT use as little disk space as possible (see the --depth and --git-gc flags), but this is not part of the contract. +SYNC PHASES + + git-sync operates in two phases: + + Initial sync: + git-sync retries until its first successful sync with the remote + repo. During this phase, the retry interval is controlled by + --init-period (falling back to --period if unset) and the failure + limit is controlled by --init-max-failures (falling back to + --max-failures when set to 0). This phase is useful for tolerating + transient connectivity issues at startup while still giving up + eventually. + + Steady state: + Once the first sync succeeds, git-sync polls the remote at the + --period interval and tolerates failures up to --max-failures before + aborting. --init-period and --init-max-failures no longer apply. + OPTIONS Many options can be specified as either a commandline flag or an environment @@ -365,16 +383,18 @@ OPTIONS Enable the pprof debug endpoints on git-sync's HTTP endpoint at /debug/pprof. Requires --http-bind to be specified. + --init-max-failures , $GITSYNC_INIT_MAX_FAILURES + The number of consecutive failures allowed before aborting during + the initial sync phase (before the first successful sync). Once + the initial sync succeeds, --max-failures applies instead. Set + to 0 (the default) to disable this separate limit and fall + through to --max-failures for the entire run. + --init-period , $GITSYNC_INIT_PERIOD How long to wait between sync attempts until the first successful - sync. This must be at least 10ms if set. If not specified, the - value of --period is used for initial sync attempts as well. - - --init-timeout , $GITSYNC_INIT_TIMEOUT - The maximum time allowed for the initial sync to succeed. If the - first successful sync does not occur within this duration, git-sync - will exit with an error. If not specified, there is no timeout and - git-sync will retry until --max-failures is reached. + sync. Once the initial sync succeeds, --period is used instead. + This must be at least 10ms if set. If not specified, --period is + used for all sync attempts. --link , $GITSYNC_LINK The path to at which to create a symlink which points to the diff --git a/main.go b/main.go index 2a8525f43..11164c58c 100644 --- a/main.go +++ b/main.go @@ -183,12 +183,12 @@ func main() { flErrorFile := pflag.String("error-file", envString("", "GITSYNC_ERROR_FILE", "GIT_SYNC_ERROR_FILE"), "the path (absolute or relative to --root) to an optional file into which errors will be written (defaults to disabled)") + flInitMaxFailures := pflag.Int("init-max-failures", + envInt(0, "GITSYNC_INIT_MAX_FAILURES"), + "the number of consecutive failures allowed before aborting during the initial sync phase; 0 disables the check (falls through to --max-failures)") flInitPeriod := pflag.Duration("init-period", envDuration(0, "GITSYNC_INIT_PERIOD"), "how long to wait between sync attempts until the first success, must be >= 10ms if set; if unset, --period is used") - flInitTimeout := pflag.Duration("init-timeout", - envDuration(0, "GITSYNC_INIT_TIMEOUT"), - "the max time allowed for the initial sync to succeed; if unset, there is no timeout (retries forever until --max-failures)") flPeriod := pflag.Duration("period", envDuration(10*time.Second, "GITSYNC_PERIOD", "GIT_SYNC_PERIOD"), "how long to wait between syncs, must be >= 10ms; --wait overrides this") @@ -478,8 +478,8 @@ func main() { if *flInitPeriod != 0 && *flInitPeriod < 10*time.Millisecond { fatalConfigErrorf(log, true, "invalid flag: --init-period must be at least 10ms") } - if *flInitTimeout != 0 && *flInitTimeout < 10*time.Millisecond { - fatalConfigErrorf(log, true, "invalid flag: --init-timeout must be at least 10ms") + if *flInitMaxFailures < 0 { + fatalConfigErrorf(log, true, "invalid flag: --init-max-failures must not be negative") } if *flDeprecatedChmod != 0 { @@ -925,10 +925,6 @@ func main() { failCount := 0 syncCount := uint64(0) initialSyncDone := false - var initStart time.Time - if *flInitPeriod != 0 || *flInitTimeout != 0 { - initStart = time.Now() - } for { start := time.Now() ctx, cancel := context.WithTimeout(context.Background(), *flSyncTimeout) @@ -936,11 +932,15 @@ func main() { if changed, hash, err := git.SyncRepo(ctx, refreshCreds); err != nil { failCount++ updateSyncMetrics(metricKeyError, start) - if isInitTimedOut(*flInitTimeout, initialSyncDone, initStart) { - log.Error(err, "initial sync timed out", "initTimeout", flInitTimeout.String(), "failCount", failCount) + if isInitFailuresExceeded(*flInitMaxFailures, initialSyncDone, failCount) { + log.Error(err, "too many initial sync failures, aborting", "initMaxFailures", *flInitMaxFailures, "failCount", failCount) os.Exit(1) } - if *flMaxFailures >= 0 && failCount >= *flMaxFailures { + // During the initial sync phase, --init-max-failures (if set) is + // the authoritative limit; --max-failures applies once init is + // done, or when --init-max-failures is not set. + useMainLimit := initialSyncDone || *flInitMaxFailures == 0 + if useMainLimit && *flMaxFailures >= 0 && failCount >= *flMaxFailures { // Exit after too many retries, maybe the error is not recoverable. log.Error(err, "too many failures, aborting", "failCount", failCount) os.Exit(1) @@ -1183,12 +1183,14 @@ func chooseWaitTime(period, initPeriod time.Duration, initialSyncDone bool) time return period } -// isInitTimedOut returns true if the initial sync has exceeded the init-timeout. -func isInitTimedOut(initTimeout time.Duration, initialSyncDone bool, initStart time.Time) bool { - if initTimeout == 0 || initialSyncDone { +// isInitFailuresExceeded returns true if the initial sync has failed more than +// initMaxFailures consecutive times. A non-positive initMaxFailures disables +// the check. +func isInitFailuresExceeded(initMaxFailures int, initialSyncDone bool, failCount int) bool { + if initMaxFailures <= 0 || initialSyncDone { return false } - return time.Since(initStart) >= initTimeout + return failCount >= initMaxFailures } // Do no work, but don't do something that triggers go's runtime into thinking @@ -2444,6 +2446,24 @@ CONTRACT use as little disk space as possible (see the --depth and --git-gc flags), but this is not part of the contract. +SYNC PHASES + + git-sync operates in two phases: + + Initial sync: + git-sync retries until its first successful sync with the remote + repo. During this phase, the retry interval is controlled by + --init-period (falling back to --period if unset) and the failure + limit is controlled by --init-max-failures (falling back to + --max-failures when set to 0). This phase is useful for tolerating + transient connectivity issues at startup while still giving up + eventually. + + Steady state: + Once the first sync succeeds, git-sync polls the remote at the + --period interval and tolerates failures up to --max-failures before + aborting. --init-period and --init-max-failures no longer apply. + OPTIONS Many options can be specified as either a commandline flag or an environment @@ -2603,18 +2623,19 @@ OPTIONS Enable the pprof debug endpoints on git-sync's HTTP endpoint at /debug/pprof. Requires --http-bind to be specified. + --init-max-failures , $GITSYNC_INIT_MAX_FAILURES + The number of consecutive failures allowed before aborting during + the initial sync phase (before the first successful sync). Once + the initial sync succeeds, --max-failures applies instead. Set + to 0 (the default) to disable this separate limit and fall + through to --max-failures for the entire run. + --init-period , $GITSYNC_INIT_PERIOD How long to wait between sync attempts until the first successful sync. Once the initial sync succeeds, --period is used instead. This must be at least 10ms if set. If not specified, --period is used for all sync attempts. - --init-timeout , $GITSYNC_INIT_TIMEOUT - The maximum amount of time to keep retrying the initial sync - before failing. If not specified, git-sync retries forever (or - until --max-failures is reached). This must be at least 10ms if - set. - --link , $GITSYNC_LINK The path to at which to create a symlink which points to the current git directory, at the currently synced hash. This may be diff --git a/main_test.go b/main_test.go index 49c20049a..4033cd8c5 100644 --- a/main_test.go +++ b/main_test.go @@ -471,43 +471,54 @@ func TestChooseWaitTime(t *testing.T) { } } -func TestIsInitTimedOut(t *testing.T) { +func TestIsInitFailuresExceeded(t *testing.T) { cases := []struct { name string - initTimeout time.Duration + initMaxFailures int initialSyncDone bool - elapsed time.Duration + failCount int expected bool }{{ - name: "no timeout set", - initTimeout: 0, + name: "disabled (zero)", + initMaxFailures: 0, initialSyncDone: false, - elapsed: time.Hour, + failCount: 100, expected: false, }, { - name: "timeout set, already done", - initTimeout: time.Second, + name: "disabled (negative)", + initMaxFailures: -1, + initialSyncDone: false, + failCount: 100, + expected: false, + }, { + name: "limit set, already done", + initMaxFailures: 3, initialSyncDone: true, - elapsed: time.Hour, + failCount: 100, expected: false, }, { - name: "timeout set, not done, not expired", - initTimeout: 10 * time.Second, + name: "limit set, not done, under limit", + initMaxFailures: 5, initialSyncDone: false, - elapsed: time.Second, + failCount: 3, expected: false, }, { - name: "timeout set, not done, expired", - initTimeout: time.Second, + name: "limit set, not done, at limit", + initMaxFailures: 3, + initialSyncDone: false, + failCount: 3, + expected: true, + }, { + name: "limit set, not done, over limit", + initMaxFailures: 3, initialSyncDone: false, - elapsed: 2 * time.Second, + failCount: 5, expected: true, }} for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - initStart := time.Now().Add(-tc.elapsed) - got := isInitTimedOut(tc.initTimeout, tc.initialSyncDone, initStart) + got := isInitFailuresExceeded(tc.initMaxFailures, tc.initialSyncDone, tc.failCount) if got != tc.expected { t.Errorf("expected %v, got %v", tc.expected, got) } diff --git a/test_e2e.sh b/test_e2e.sh index 8d71fe779..91d133ef8 100755 --- a/test_e2e.sh +++ b/test_e2e.sh @@ -3718,13 +3718,13 @@ function e2e::init_period_faster_initial_sync() { } ############################################## -# Test init-timeout exits on timeout +# Test init-max-failures aborts after N failed attempts ############################################## -function e2e::init_timeout_expires() { +function e2e::init_max_failures_exceeded() { assert_fail \ GIT_SYNC \ --period=100ms \ - --init-timeout=1s \ + --init-max-failures=3 \ --max-failures=-1 \ --repo="file:///does/not/exist" \ --root="$ROOT" \ From b34572c7549fa9aa2f02998046bfeab9338ef3a3 Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Fri, 24 Apr 2026 03:05:00 +0200 Subject: [PATCH 6/6] address review: simplify closure, use pflag.Changed for init-max-failures - move flInitMaxFailures next to flMaxFailures - default flInitPeriod to flPeriod to kill conditional in hot path - replace isInitFailuresExceeded + useMainLimit with getMaxFailures closure - track waitTime as a loop-outer variable, flip on init-done - detect --init-max-failures via pflag.Changed || GITSYNC_INIT_MAX_FAILURES env; -1 = unlimited, 0 = abort immediately, unset = fall through to --max-failures - drop now-stale unit tests for removed helpers, update --man and README --- README.md | 9 +++-- main.go | 75 +++++++++++++++----------------------- main_test.go | 101 --------------------------------------------------- 3 files changed, 35 insertions(+), 150 deletions(-) diff --git a/README.md b/README.md index 943955cd0..2b56d8575 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ SYNC PHASES repo. During this phase, the retry interval is controlled by --init-period (falling back to --period if unset) and the failure limit is controlled by --init-max-failures (falling back to - --max-failures when set to 0). This phase is useful for tolerating + --max-failures when unset). This phase is useful for tolerating transient connectivity issues at startup while still giving up eventually. @@ -386,9 +386,10 @@ OPTIONS --init-max-failures , $GITSYNC_INIT_MAX_FAILURES The number of consecutive failures allowed before aborting during the initial sync phase (before the first successful sync). Once - the initial sync succeeds, --max-failures applies instead. Set - to 0 (the default) to disable this separate limit and fall - through to --max-failures for the entire run. + the initial sync succeeds, --max-failures applies instead. + Setting this to a negative value will retry forever during the + initial sync. If this flag is not set, --max-failures applies + to the initial sync phase as well. --init-period , $GITSYNC_INIT_PERIOD How long to wait between sync attempts until the first successful diff --git a/main.go b/main.go index 11164c58c..346f56755 100644 --- a/main.go +++ b/main.go @@ -183,9 +183,6 @@ func main() { flErrorFile := pflag.String("error-file", envString("", "GITSYNC_ERROR_FILE", "GIT_SYNC_ERROR_FILE"), "the path (absolute or relative to --root) to an optional file into which errors will be written (defaults to disabled)") - flInitMaxFailures := pflag.Int("init-max-failures", - envInt(0, "GITSYNC_INIT_MAX_FAILURES"), - "the number of consecutive failures allowed before aborting during the initial sync phase; 0 disables the check (falls through to --max-failures)") flInitPeriod := pflag.Duration("init-period", envDuration(0, "GITSYNC_INIT_PERIOD"), "how long to wait between sync attempts until the first success, must be >= 10ms if set; if unset, --period is used") @@ -204,6 +201,9 @@ func main() { flMaxFailures := pflag.Int("max-failures", envInt(0, "GITSYNC_MAX_FAILURES", "GIT_SYNC_MAX_FAILURES"), "the number of consecutive failures allowed before aborting (-1 will retry forever") + flInitMaxFailures := pflag.Int("init-max-failures", + envInt(0, "GITSYNC_INIT_MAX_FAILURES"), + "the number of consecutive failures allowed before aborting during the initial sync phase; a negative value retries forever; if unset, --max-failures applies instead") flTouchFile := pflag.String("touch-file", envString("", "GITSYNC_TOUCH_FILE", "GIT_SYNC_TOUCH_FILE"), "the path (absolute or relative to --root) to an optional file which will be touched whenever a sync completes (defaults to disabled)") @@ -382,6 +382,11 @@ func main() { pflag.Parse() + // Was --init-max-failures explicitly set (CLI or env)? envInt applies + // the env value as the pflag default, so pflag.Changed alone misses it. + _, initMaxFailuresEnv := os.LookupEnv("GITSYNC_INIT_MAX_FAILURES") + initMaxFailuresSet := initMaxFailuresEnv || pflag.Lookup("init-max-failures").Changed + // Handle print-and-exit cases. if *flVersion { fmt.Fprintln(os.Stdout, version.VERSION) @@ -475,12 +480,11 @@ func main() { if *flPeriod < 10*time.Millisecond { fatalConfigErrorf(log, true, "invalid flag: --period must be at least 10ms") } - if *flInitPeriod != 0 && *flInitPeriod < 10*time.Millisecond { + if *flInitPeriod == 0 { + *flInitPeriod = *flPeriod + } else if *flInitPeriod < 10*time.Millisecond { fatalConfigErrorf(log, true, "invalid flag: --init-period must be at least 10ms") } - if *flInitMaxFailures < 0 { - fatalConfigErrorf(log, true, "invalid flag: --init-max-failures must not be negative") - } if *flDeprecatedChmod != 0 { fatalConfigErrorf(log, true, "deprecated flag: --change-permissions is no longer supported") @@ -925,6 +929,16 @@ func main() { failCount := 0 syncCount := uint64(0) initialSyncDone := false + waitTime := *flInitPeriod + // getMaxFailures returns the effective max-failure limit for the current + // phase. During the initial sync phase, --init-max-failures (if set) + // overrides --max-failures; otherwise --max-failures applies. + getMaxFailures := func() int { + if !initialSyncDone && initMaxFailuresSet { + return *flInitMaxFailures + } + return *flMaxFailures + } for { start := time.Now() ctx, cancel := context.WithTimeout(context.Background(), *flSyncTimeout) @@ -932,24 +946,16 @@ func main() { if changed, hash, err := git.SyncRepo(ctx, refreshCreds); err != nil { failCount++ updateSyncMetrics(metricKeyError, start) - if isInitFailuresExceeded(*flInitMaxFailures, initialSyncDone, failCount) { - log.Error(err, "too many initial sync failures, aborting", "initMaxFailures", *flInitMaxFailures, "failCount", failCount) - os.Exit(1) - } - // During the initial sync phase, --init-max-failures (if set) is - // the authoritative limit; --max-failures applies once init is - // done, or when --init-max-failures is not set. - useMainLimit := initialSyncDone || *flInitMaxFailures == 0 - if useMainLimit && *flMaxFailures >= 0 && failCount >= *flMaxFailures { - // Exit after too many retries, maybe the error is not recoverable. - log.Error(err, "too many failures, aborting", "failCount", failCount) + if maxFails := getMaxFailures(); maxFails >= 0 && failCount >= maxFails { + log.Error(err, "too many failures, aborting", "failCount", failCount, "maxFailures", maxFails) os.Exit(1) } log.Error(err, "error syncing repo, will retry", "failCount", failCount) } else { if !initialSyncDone { initialSyncDone = true - if *flInitPeriod != 0 { + waitTime = *flPeriod + if *flInitPeriod != *flPeriod { log.V(0).Info("initial sync complete, switching to normal period", "initPeriod", flInitPeriod.String(), "period", flPeriod.String()) } } @@ -1016,8 +1022,6 @@ func main() { log.DeleteErrorFile() } - // Use init-period for retries before the first successful sync. - waitTime := chooseWaitTime(*flPeriod, *flInitPeriod, initialSyncDone) log.V(3).Info("next sync", "waitTime", waitTime.String(), "syncCount", syncCount) cancel() @@ -1173,26 +1177,6 @@ func setRepoReady() { repoReady = true } -// chooseWaitTime returns the appropriate wait duration based on whether the -// initial sync has completed. If initPeriod is non-zero and the initial sync -// is not yet done, it returns initPeriod. Otherwise it returns period. -func chooseWaitTime(period, initPeriod time.Duration, initialSyncDone bool) time.Duration { - if !initialSyncDone && initPeriod != 0 { - return initPeriod - } - return period -} - -// isInitFailuresExceeded returns true if the initial sync has failed more than -// initMaxFailures consecutive times. A non-positive initMaxFailures disables -// the check. -func isInitFailuresExceeded(initMaxFailures int, initialSyncDone bool, failCount int) bool { - if initMaxFailures <= 0 || initialSyncDone { - return false - } - return failCount >= initMaxFailures -} - // Do no work, but don't do something that triggers go's runtime into thinking // it is deadlocked. func sleepForever() { @@ -2455,7 +2439,7 @@ SYNC PHASES repo. During this phase, the retry interval is controlled by --init-period (falling back to --period if unset) and the failure limit is controlled by --init-max-failures (falling back to - --max-failures when set to 0). This phase is useful for tolerating + --max-failures when unset). This phase is useful for tolerating transient connectivity issues at startup while still giving up eventually. @@ -2626,9 +2610,10 @@ OPTIONS --init-max-failures , $GITSYNC_INIT_MAX_FAILURES The number of consecutive failures allowed before aborting during the initial sync phase (before the first successful sync). Once - the initial sync succeeds, --max-failures applies instead. Set - to 0 (the default) to disable this separate limit and fall - through to --max-failures for the entire run. + the initial sync succeeds, --max-failures applies instead. + Setting this to a negative value will retry forever during the + initial sync. If this flag is not set, --max-failures applies + to the initial sync phase as well. --init-period , $GITSYNC_INIT_PERIOD How long to wait between sync attempts until the first successful diff --git a/main_test.go b/main_test.go index 4033cd8c5..13de2a4e9 100644 --- a/main_test.go +++ b/main_test.go @@ -425,107 +425,6 @@ func TestTouch(t *testing.T) { } } -func TestChooseWaitTime(t *testing.T) { - period := 10 * time.Second - initPeriod := 500 * time.Millisecond - - cases := []struct { - name string - period time.Duration - initPeriod time.Duration - initialSyncDone bool - expected time.Duration - }{{ - name: "no init-period, not done", - period: period, - initPeriod: 0, - initialSyncDone: false, - expected: period, - }, { - name: "no init-period, done", - period: period, - initPeriod: 0, - initialSyncDone: true, - expected: period, - }, { - name: "init-period set, not done", - period: period, - initPeriod: initPeriod, - initialSyncDone: false, - expected: initPeriod, - }, { - name: "init-period set, done", - period: period, - initPeriod: initPeriod, - initialSyncDone: true, - expected: period, - }} - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - got := chooseWaitTime(tc.period, tc.initPeriod, tc.initialSyncDone) - if got != tc.expected { - t.Errorf("expected %v, got %v", tc.expected, got) - } - }) - } -} - -func TestIsInitFailuresExceeded(t *testing.T) { - cases := []struct { - name string - initMaxFailures int - initialSyncDone bool - failCount int - expected bool - }{{ - name: "disabled (zero)", - initMaxFailures: 0, - initialSyncDone: false, - failCount: 100, - expected: false, - }, { - name: "disabled (negative)", - initMaxFailures: -1, - initialSyncDone: false, - failCount: 100, - expected: false, - }, { - name: "limit set, already done", - initMaxFailures: 3, - initialSyncDone: true, - failCount: 100, - expected: false, - }, { - name: "limit set, not done, under limit", - initMaxFailures: 5, - initialSyncDone: false, - failCount: 3, - expected: false, - }, { - name: "limit set, not done, at limit", - initMaxFailures: 3, - initialSyncDone: false, - failCount: 3, - expected: true, - }, { - name: "limit set, not done, over limit", - initMaxFailures: 3, - initialSyncDone: false, - failCount: 5, - expected: true, - }} - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - got := isInitFailuresExceeded(tc.initMaxFailures, tc.initialSyncDone, tc.failCount) - if got != tc.expected { - t.Errorf("expected %v, got %v", tc.expected, got) - } - }) - } -} - func TestHasGitLockFile(t *testing.T) { testCases := map[string]struct { inputFilePath []string