From 9f010a59fbf5e4fe0e44106a07184ee7132e601f Mon Sep 17 00:00:00 2001 From: Bryce Lelbach Date: Sat, 18 Apr 2026 07:17:01 +0000 Subject: [PATCH 1/2] fix(auth): refresh on 401 and stop writing bogus refresh tokens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users report being logged out of brev-cli multiple times per hour, forcing repeated `brev login --token`. Two bugs in the auth flow caused this: 1. `LoginWithToken` stored the literal string "auto-login" as the RefreshToken whenever it was handed a valid JWT access token (pkg/auth/auth.go). When that short-lived access token later expired, the refresh path in `GetFreshAccessTokenOrNil` tried to exchange "auto-login" with the IdP, which always failed, so the user was prompted to re-login every time the access token aged out — roughly hourly with typical NVIDIA KAS token lifetimes. 2. The auth-failure retry handler in pkg/store/http.go only fired on HTTP 403 Forbidden. APIs returning 401 Unauthorized (the more standard response for expired credentials) bypassed the refresh hook entirely and surfaced as a hard failure on the first such response, even when the refresh token was still good. This change: - Stores an empty RefreshToken when LoginWithToken receives a JWT, so the refresh path correctly recognizes there is nothing to refresh and falls through without a failed IdP round-trip. Emits a one-line notice on stderr so the user knows this session cannot be auto-renewed. - Normalizes the legacy "auto-login" sentinel to "" on read, so users with existing credentials.json files from older CLI versions benefit from the fix without re-running `brev login --token`. - Changes `GetFreshAccessTokenOrNil` to return "" (rather than an expired AccessToken) when there is no way to refresh, so callers prompt for re-login cleanly instead of shipping an expired JWT to the API. - Extends the refresh-and-retry handler in AuthHTTPStore to fire on both 401 and 403. Longer-term the website at brev.nvidia.com/settings/cli should hand out a real refresh token (or a short-lived exchange code) rather than a raw access JWT so `brev login --token` can produce a refreshable session. This PR is the CLI-only stop-the-bleeding fix. Co-Authored-By: Claude Opus 4.7 --- pkg/auth/auth.go | 38 +++++++++++++++++++++++++++++++++----- pkg/store/http.go | 12 ++++++++++-- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/pkg/auth/auth.go b/pkg/auth/auth.go index 3138fd034..7c9846309 100644 --- a/pkg/auth/auth.go +++ b/pkg/auth/auth.go @@ -146,6 +146,13 @@ func (t Auth) GetFreshAccessTokenOrNil() (string, error) { return "", nil } + // Older CLI versions stored the literal string "auto-login" when + // `brev login --token` had no real refresh token to save. Treat it as + // absent so we do not attempt to exchange it with the IdP and fail. + if tokens.RefreshToken == autoLoginSentinel { + tokens.RefreshToken = "" + } + // should always at least have access token? if tokens.AccessToken == "" { breverrors.GetDefaultErrorReporter().ReportMessage("access token is an empty string but shouldn't be") @@ -154,7 +161,13 @@ func (t Auth) GetFreshAccessTokenOrNil() (string, error) { if err != nil { return "", breverrors.WrapAndTrace(err) } - if !isAccessTokenValid && tokens.RefreshToken != "" { + if !isAccessTokenValid { + if tokens.RefreshToken == "" { + // Access token is expired and we have no refresh token. Returning + // the expired token here would just cause a 401 on the next API + // call; return empty so callers can prompt for re-login instead. + return "", nil + } tokens, err = t.getNewTokensWithRefreshOrNil(tokens.RefreshToken) if err != nil { return "", breverrors.WrapAndTrace(err) @@ -162,8 +175,6 @@ func (t Auth) GetFreshAccessTokenOrNil() (string, error) { if tokens == nil { return "", nil } - } else if tokens.RefreshToken == "" && tokens.AccessToken == "" { - return "", nil } return tokens.AccessToken, nil } @@ -197,22 +208,39 @@ func shouldLogin() (bool, error) { return trimmed == "y" || trimmed == "", nil } +// autoLoginSentinel is a legacy value older CLI versions stored in place of +// a real token when `brev login --token` was used. It is not a valid token of +// any kind; treat it as "absent" on read. +const autoLoginSentinel = "auto-login" + func (t Auth) LoginWithToken(token string) error { valid, err := isAccessTokenValid(token) if err != nil { return breverrors.WrapAndTrace(err) } if valid { + // The token is a self-contained JWT access token with no accompanying + // refresh token. Previously we stored the string "auto-login" in the + // RefreshToken slot; when the access token expired the refresh path + // then attempted to exchange that sentinel with the IdP, which always + // failed, logging the user out every time the short-lived access + // token aged out. Store an empty RefreshToken instead so the refresh + // path correctly recognizes there is nothing to refresh and prompts + // for a fresh login exactly once. + fmt.Fprintln(os.Stderr, "Note: tokens from --token cannot be refreshed; re-run `brev login` when the session expires.") err := t.authStore.SaveAuthTokens(entity.AuthTokens{ AccessToken: token, - RefreshToken: "auto-login", + RefreshToken: "", }) if err != nil { return breverrors.WrapAndTrace(err) } } else { + // The token is not a JWT, assume it is a refresh token. The access + // token slot is filled with the sentinel so the first API call + // triggers a refresh to populate a real access token. err := t.authStore.SaveAuthTokens(entity.AuthTokens{ - AccessToken: "auto-login", + AccessToken: autoLoginSentinel, RefreshToken: token, }) if err != nil { diff --git a/pkg/store/http.go b/pkg/store/http.go index 60884f810..1bb264971 100644 --- a/pkg/store/http.go +++ b/pkg/store/http.go @@ -104,7 +104,7 @@ func (s *AuthHTTPStore) SetForbiddenStatusRetryHandler(handler func() error) err } attemptsThresh := 1 s.authHTTPClient.restyClient.OnAfterResponse(func(c *resty.Client, r *resty.Response) error { - if r.StatusCode() == http.StatusForbidden && r.Request.Attempt < attemptsThresh+1 { + if isAuthFailure(r.StatusCode()) && r.Request.Attempt < attemptsThresh+1 { err := handler() if err != nil { return breverrors.WrapAndTrace(err) @@ -117,7 +117,7 @@ func (s *AuthHTTPStore) SetForbiddenStatusRetryHandler(handler func() error) err if e != nil { return false } - return r.StatusCode() == http.StatusForbidden + return isAuthFailure(r.StatusCode()) }) s.authHTTPClient.restyClient.SetRetryCount(attemptsThresh) @@ -125,6 +125,14 @@ func (s *AuthHTTPStore) SetForbiddenStatusRetryHandler(handler func() error) err return nil } +// isAuthFailure reports whether an HTTP status code indicates the caller's +// credentials are missing, invalid, or expired. Both 401 Unauthorized and +// 403 Forbidden can signal an expired access token from Brev's APIs, so we +// treat both as triggers for the refresh-and-retry path. +func isAuthFailure(code int) bool { + return code == http.StatusUnauthorized || code == http.StatusForbidden +} + type AuthHTTPClient struct { restyClient *resty.Client auth Auth From f4d1c7bdd4421ddca19605e12650b4d97619cb2e Mon Sep 17 00:00:00 2001 From: Bryce Lelbach Date: Sat, 18 Apr 2026 07:30:43 +0000 Subject: [PATCH 2/2] feat(auth): refresh access token proactively before it expires The previous behavior waited until an API call returned 401 to trigger the refresh path. That keeps one unnecessary round-trip on the critical path of every call that races the token's exp boundary, and it surfaces transient auth-provider network failures as hard failures rather than retries. This change lets the CLI refresh ahead of expiry: - `AuthTokens` gains optional `AccessTokenExp` and `IssuedAt` fields (omitempty). They are populated from the access JWT's `exp` / `iat` claims at save time, and fall back to live JWT parsing for credential files written by older CLI versions. No migration needed. - `GetFreshAccessTokenOrNil` refreshes when the access token is invalid OR when it is valid but within `refreshBeforeExpiry` (5 min) of exp. The proactive branch is tolerant of refresh failure: if the IdP is briefly unreachable, the CLI keeps using the still-valid access token and retries on the next call rather than logging the user out. - `getNewTokensWithRefreshOrNil` classifies refresh errors. Network- level failures (timeouts, connection refused, DNS) are wrapped with a clearer message; authoritative IdP rejection produces a one-line stderr prompt ("re-run `brev login`") rather than a stack-trace dump. Builds on the reactive 401/403 retry from the prior auth fix. --- pkg/auth/auth.go | 144 ++++++++++++++++++++++++++++++++++++++----- pkg/entity/entity.go | 8 +++ 2 files changed, 138 insertions(+), 14 deletions(-) diff --git a/pkg/auth/auth.go b/pkg/auth/auth.go index 7c9846309..e0f564552 100644 --- a/pkg/auth/auth.go +++ b/pkg/auth/auth.go @@ -4,10 +4,13 @@ import ( "bufio" "errors" "fmt" + "net" + "net/url" "os" "os/exec" "runtime" "strings" + "time" "github.com/brevdev/brev-cli/pkg/config" "github.com/brevdev/brev-cli/pkg/entity" @@ -17,6 +20,12 @@ import ( "github.com/pkg/browser" ) +// refreshBeforeExpiry is how far in advance of access-token expiration the +// CLI refreshes. Using a window larger than typical request RTTs avoids 401 +// round-trips at the tail of a token's life, at the cost of refreshing a +// small number of still-valid tokens. +const refreshBeforeExpiry = 5 * time.Minute + type LoginAuth struct { Auth } @@ -161,24 +170,123 @@ func (t Auth) GetFreshAccessTokenOrNil() (string, error) { if err != nil { return "", breverrors.WrapAndTrace(err) } - if !isAccessTokenValid { + + // Trigger a refresh when the token is invalid OR when it is still valid + // but close enough to expiry that the next API call is likely to race + // the exp boundary. The proactive branch is tolerant of refresh failure: + // if the IdP is briefly unreachable, fall back to the (still-valid) + // current access token rather than logging the user out. + expiringSoon := isAccessTokenValid && tokens.RefreshToken != "" && accessTokenExpiresSoon(tokens) + if !isAccessTokenValid || expiringSoon { if tokens.RefreshToken == "" { // Access token is expired and we have no refresh token. Returning // the expired token here would just cause a 401 on the next API // call; return empty so callers can prompt for re-login instead. return "", nil } - tokens, err = t.getNewTokensWithRefreshOrNil(tokens.RefreshToken) - if err != nil { - return "", breverrors.WrapAndTrace(err) + newTokens, refreshErr := t.getNewTokensWithRefreshOrNil(tokens.RefreshToken) + if refreshErr != nil { + if expiringSoon { + // Current token still validates; swallow the transient + // failure and try again on the next call. + return tokens.AccessToken, nil + } + return "", breverrors.WrapAndTrace(refreshErr) } - if tokens == nil { + if newTokens == nil { return "", nil } + tokens = newTokens } return tokens.AccessToken, nil } +// accessTokenExpiresSoon reports whether the stored access token's +// expiration is within refreshBeforeExpiry of now. It prefers the persisted +// AccessTokenExp field (written by populateTokenTimestamps on save) and +// falls back to decoding the access JWT for files written by older CLI +// versions that never persisted the claim. +func accessTokenExpiresSoon(tokens *entity.AuthTokens) bool { + var exp time.Time + if tokens.AccessTokenExp != nil { + exp = *tokens.AccessTokenExp + } else { + exp, _ = accessTokenClaims(tokens.AccessToken) + } + if exp.IsZero() { + return false + } + return time.Until(exp) < refreshBeforeExpiry +} + +// accessTokenClaims parses the access JWT without signature verification +// and returns its exp and iat claims. Missing or malformed claims are +// returned as the zero time.Time; the caller is responsible for guarding +// with IsZero(). +func accessTokenClaims(token string) (exp, iat time.Time) { + if token == "" { + return time.Time{}, time.Time{} + } + parser := jwt.Parser{} + ptoken, _, err := parser.ParseUnverified(token, jwt.MapClaims{}) + if err != nil { + return time.Time{}, time.Time{} + } + claims, ok := ptoken.Claims.(jwt.MapClaims) + if !ok { + return time.Time{}, time.Time{} + } + if v, ok := claims["exp"].(float64); ok { + exp = time.Unix(int64(v), 0) + } + if v, ok := claims["iat"].(float64); ok { + iat = time.Unix(int64(v), 0) + } + return exp, iat +} + +// populateTokenTimestamps fills in AccessTokenExp and IssuedAt from the +// access JWT when they are not already set. Safe to call on any AuthTokens +// value; missing or non-JWT access tokens leave the fields nil. +func populateTokenTimestamps(tokens *entity.AuthTokens) { + if tokens == nil || tokens.AccessToken == "" { + return + } + exp, iat := accessTokenClaims(tokens.AccessToken) + if tokens.AccessTokenExp == nil && !exp.IsZero() { + tokens.AccessTokenExp = &exp + } + if tokens.IssuedAt == nil && !iat.IsZero() { + tokens.IssuedAt = &iat + } +} + +// isTransientRefreshError reports whether an error from the OAuth refresh +// call is a transient network condition (timeout, connection refused, +// DNS failure, etc.) as opposed to an authoritative rejection of the +// refresh token by the IdP. Transient errors should not force the user to +// re-login. +func isTransientRefreshError(err error) bool { + if err == nil { + return false + } + var urlErr *url.Error + if errors.As(err, &urlErr) { + if urlErr.Timeout() { + return true + } + } + var netErr net.Error + if errors.As(err, &netErr) && netErr.Timeout() { + return true + } + // DNS / connection-refused / TLS handshake errors surface as url.Error + // wrapping an *net.OpError. Treat connection-level failures as + // transient: the refresh token is probably fine, the network isn't. + var opErr *net.OpError + return errors.As(err, &opErr) +} + // Prompts for login and returns tokens, and saves to store func (t Auth) PromptForLogin() (*LoginTokens, error) { shouldLogin, err := t.shouldLogin() @@ -228,22 +336,22 @@ func (t Auth) LoginWithToken(token string) error { // path correctly recognizes there is nothing to refresh and prompts // for a fresh login exactly once. fmt.Fprintln(os.Stderr, "Note: tokens from --token cannot be refreshed; re-run `brev login` when the session expires.") - err := t.authStore.SaveAuthTokens(entity.AuthTokens{ + tokens := entity.AuthTokens{ AccessToken: token, RefreshToken: "", - }) - if err != nil { + } + populateTokenTimestamps(&tokens) + if err := t.authStore.SaveAuthTokens(tokens); err != nil { return breverrors.WrapAndTrace(err) } } else { // The token is not a JWT, assume it is a refresh token. The access // token slot is filled with the sentinel so the first API call // triggers a refresh to populate a real access token. - err := t.authStore.SaveAuthTokens(entity.AuthTokens{ + if err := t.authStore.SaveAuthTokens(entity.AuthTokens{ AccessToken: autoLoginSentinel, RefreshToken: token, - }) - if err != nil { + }); err != nil { return breverrors.WrapAndTrace(err) } } @@ -350,13 +458,20 @@ func (t Auth) getSavedTokensOrNil() (*entity.AuthTokens, error) { // gets new access and refresh token or returns nil if refresh token expired, and updates store func (t Auth) getNewTokensWithRefreshOrNil(refreshToken string) (*entity.AuthTokens, error) { tokens, err := t.oauth.GetNewAuthTokensWithRefresh(refreshToken) - // TODO 2 handle if 403 invalid grant - // https://stackoverflow.com/questions/57383523/how-to-detect-when-an-oauth2-refresh-token-expired if err != nil { if strings.Contains(err.Error(), "not implemented") { return nil, nil } - return nil, breverrors.WrapAndTrace(err) + if isTransientRefreshError(err) { + // Network hiccup; do not clear the user's session. Surface the + // error so the caller can decide whether to swallow it (when + // the current access token is still valid) or propagate it. + return nil, breverrors.WrapAndTrace(fmt.Errorf("could not reach auth provider to refresh session: %w", err)) + } + // Definitive rejection from the IdP. Tell the user in plain + // language rather than burying it in a stack trace. + fmt.Fprintln(os.Stderr, "Your brev session could not be refreshed; re-run `brev login`.") + return nil, nil } if tokens == nil { return nil, nil @@ -364,6 +479,7 @@ func (t Auth) getNewTokensWithRefreshOrNil(refreshToken string) (*entity.AuthTok if tokens.RefreshToken == "" { tokens.RefreshToken = refreshToken } + populateTokenTimestamps(tokens) err = t.authStore.SaveAuthTokens(*tokens) if err != nil { diff --git a/pkg/entity/entity.go b/pkg/entity/entity.go index 868f1fee9..d359fb7a0 100644 --- a/pkg/entity/entity.go +++ b/pkg/entity/entity.go @@ -27,6 +27,14 @@ var LegacyWorkspaceGroups = map[string]bool{ type AuthTokens struct { AccessToken string `json:"access_token"` RefreshToken string `json:"refresh_token"` + // AccessTokenExp and IssuedAt are populated from the access JWT's `exp` + // and `iat` claims when available. They let the CLI refresh proactively + // before the access token expires, and let UX surfaces like `brev + // status` display session lifetime without re-parsing the JWT. Both are + // optional: files written by older CLI versions lack these fields, and + // tokens whose JWTs do not carry the claims will leave them nil. + AccessTokenExp *time.Time `json:"access_token_exp,omitempty"` + IssuedAt *time.Time `json:"issued_at,omitempty"` } type IDEConfig struct {