From 443dc80191cf7b623af6da156a8efa1846590ce9 Mon Sep 17 00:00:00 2001 From: Dmitrii Creed Date: Sat, 27 Jun 2026 20:10:30 +0400 Subject: [PATCH 1/6] feat(secrets): fail-closed version guard on the secrets store Adds a `version` field to secrets.yaml and a CurrentSecretsFileVersion this build understands (0 = the original format; absent version is treated as 0). unmarshalSecretsFile now REFUSES to read any store whose version exceeds what this build supports, instead of partially parsing it and then silently dropping the unknown fields on the next write (which would corrupt the store). This is the prerequisite for any future secrets-format change: this fail-closed reader must ship and roll out fleet-wide BEFORE a higher-versioned store is ever written, so older binaries refuse rather than corrupt. `version,omitempty` keeps existing stores byte-identical on rewrite (no churn, full back-compat). Tests: a version-99 store is refused with a clear upgrade message; a version-less (current) store still reads. Signed-off-by: Dmitrii Creed --- pkg/api/secrets/cryptor.go | 12 ++++++++ pkg/api/secrets/management.go | 8 +++++ pkg/api/secrets/version_guard_test.go | 44 +++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 pkg/api/secrets/version_guard_test.go diff --git a/pkg/api/secrets/cryptor.go b/pkg/api/secrets/cryptor.go index b1bc4cc5..5ef7f9bd 100644 --- a/pkg/api/secrets/cryptor.go +++ b/pkg/api/secrets/cryptor.go @@ -14,6 +14,15 @@ import ( const EncryptedSecretFilesDataFileName = "secrets.yaml" +// CurrentSecretsFileVersion is the highest secrets.yaml schema version this build +// understands. A store with no `version` field is treated as version 0 (the +// original, current format). When a future format bumps this, OLDER binaries +// (which carry a lower CurrentSecretsFileVersion) refuse to read it — see the +// guard in unmarshalSecretsFile — instead of silently dropping the new fields on +// the next write. This reader must therefore ship and roll out fleet-wide BEFORE +// any higher-versioned store is ever written. +const CurrentSecretsFileVersion = 0 + type Cryptor interface { GenerateKeyPairWithProfile(projectName, profile string) error GenerateEd25519KeyPairWithProfile(projectName, profile string) error @@ -76,6 +85,9 @@ func (c *cryptor) PrivateKey() string { } type EncryptedSecretFiles struct { + // Version is the secrets.yaml schema version. Absent/0 = the original format. + // A reader refuses any version above CurrentSecretsFileVersion (fail-closed). + Version int `json:"version,omitempty" yaml:"version,omitempty"` Registry Registry `json:"registry" yaml:"registry"` Secrets map[string]EncryptedSecrets `json:"secrets" yaml:"secrets"` } diff --git a/pkg/api/secrets/management.go b/pkg/api/secrets/management.go index ebdac915..f27a4cc9 100644 --- a/pkg/api/secrets/management.go +++ b/pkg/api/secrets/management.go @@ -164,6 +164,14 @@ func (c *cryptor) unmarshalSecretsFile() error { } if res, err := api.UnmarshalDescriptor[EncryptedSecretFiles](secretsFileData); err != nil || res == nil { return errors.Wrapf(err, "failed to unmarshal secrets file: %q", secretsFilePath) + } else if res.Version > CurrentSecretsFileVersion { + // Fail closed: a newer schema this build doesn't understand. Reading and + // then writing would silently drop the fields we can't model and corrupt + // the store, so refuse outright. + return errors.Errorf( + "secrets file %q is version %d, but this sc build supports up to version %d; upgrade sc (refusing to read to avoid data loss)", + secretsFilePath, res.Version, CurrentSecretsFileVersion, + ) } else { c.secrets = *res // Normalize all public keys to ensure consistency (remove aliases/comments) diff --git a/pkg/api/secrets/version_guard_test.go b/pkg/api/secrets/version_guard_test.go new file mode 100644 index 00000000..c6f875a2 --- /dev/null +++ b/pkg/api/secrets/version_guard_test.go @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) Simple Container + +package secrets + +import ( + "os" + "path" + "testing" + + . "github.com/onsi/gomega" + + "github.com/simple-container-com/api/pkg/api" +) + +// TestUnmarshalSecretsFile_RejectsNewerVersion is the fail-closed guard: a store +// written by a future schema version must be refused, not partially read (which +// would silently drop the fields this build can't model and corrupt the store). +func TestUnmarshalSecretsFile_RejectsNewerVersion(t *testing.T) { + RegisterTestingT(t) + c, wd, cleanup := newTestCryptor(t) + defer cleanup() + + secretsPath := path.Join(wd, api.ScConfigDirectory, EncryptedSecretFilesDataFileName) + Expect(os.WriteFile(secretsPath, []byte("version: 99\nregistry:\n files: []\nsecrets: {}\n"), 0o600)).To(Succeed()) + + err := c.ReadSecretFiles() + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("version 99")) + Expect(err.Error()).To(ContainSubstring("refusing to read")) +} + +// TestUnmarshalSecretsFile_AcceptsCurrentVersion confirms back-compat: a store +// with no version field (version 0, the original format) reads normally. +func TestUnmarshalSecretsFile_AcceptsCurrentVersion(t *testing.T) { + RegisterTestingT(t) + c, wd, cleanup := newTestCryptor(t) + defer cleanup() + + secretsPath := path.Join(wd, api.ScConfigDirectory, EncryptedSecretFilesDataFileName) + Expect(os.WriteFile(secretsPath, []byte("registry:\n files: []\nsecrets: {}\n"), 0o600)).To(Succeed()) + + Expect(c.ReadSecretFiles()).To(Succeed()) +} From 58bfe74d22553d131d902ad0080e4b29abeed3f3 Mon Sep 17 00:00:00 2001 From: Dmitrii Creed Date: Sat, 27 Jun 2026 15:22:08 +0400 Subject: [PATCH 2/6] docs(secrets): RFC for keyless secret backend (KMS recipients + OIDC) Proposes pluggable recipient types whose private key never leaves a remote authority (cloud KMS / Vault) plus a keyless CI mode via OIDC federation, so the store private key (SIMPLE_CONTAINER_CONFIG) no longer has to be materialized on CI runners. Cloud-agnostic and backward-compatible: the local recipient stays the default; migration is additive, per-repo, and reversible. Covers the v2 envelope prerequisite (single AEAD DEK + per-recipient wrapping + AAD binding), the OIDC trust/authorization model, format versioning with a fail-closed guard, revocation reality (value rotation), operational concerns, and a phased migration plan. Signed-off-by: Dmitrii Creed --- docs/design/keyless-secrets/README.md | 235 ++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 docs/design/keyless-secrets/README.md diff --git a/docs/design/keyless-secrets/README.md b/docs/design/keyless-secrets/README.md new file mode 100644 index 00000000..49bc9f60 --- /dev/null +++ b/docs/design/keyless-secrets/README.md @@ -0,0 +1,235 @@ +# RFC: Keyless secret backend (KMS-wrapped recipients + OIDC federation) + +**Status:** Draft / request for comments +**Area:** `pkg/api/secrets` (encryption envelope, recipients, CLI `secrets` commands) +**Relates to:** [`docs/SECRETS-POLICY.md`](../../SECRETS-POLICY.md), [`docs/SECURITY.md`](../../SECURITY.md) + +## Summary + +Today, decrypting an SC secret store in CI requires materializing the store's +**private key** (`SIMPLE_CONTAINER_CONFIG`) in plaintext on the runner — typically +as a CI secret. That value is long-lived, all-powerful (decrypts the entire store), +not rotated in practice, not audited, and not revocable after a leak. + +This RFC proposes adding **pluggable recipient types whose private key never leaves a +remote authority** (a cloud KMS or Vault), and a **keyless CI mode** where the runner +proves its identity via OIDC federation and obtains a short-lived `Decrypt` permission +instead of holding any long-lived key. It is **cloud-agnostic** (AWS/GCP/Azure/Vault, +or none) and **backward-compatible** (the existing local-key recipient stays the +default; migration is additive, per-repo, and reversible). + +The crypto primitives are not reinvented; the change is the **key-custody model** plus +a **v2 envelope format** required to support per-recipient key wrapping cleanly. + +## Motivation + +The SC envelope is already **multi-recipient** (`AddPublicKey`/`RemovePublicKey`, +CLI `secrets allow`/`disallow`). The limitation is that there is exactly **one +recipient type**: a raw asymmetric private key. Because decryption requires possessing +that private key, the key must be placed on every machine that decrypts — including +CI. There is no way to delegate the unwrap to an external authority. + +Consequences of the single long-lived key in CI: + +- **Exfiltration surface.** Any code that runs in the job (including a malicious + transitive dependency on an untrusted PR build) can read the key and decrypt the + whole store. +- **No rotation in practice.** Rotating means re-keying and redistributing the key to + every consumer. +- **No audit.** Use of a local private key is not logged anywhere. +- **No revocation.** A leaked private key decrypts forever. + +## Goals / non-goals + +**Goals** +- Allow a store to be decrypted in CI with **no long-lived secret present** on the runner. +- Keep it **cloud-agnostic** and keep the **simple local-key path as the default**. +- Make access **scoped, audited, and revocable**. +- Provide an **additive, reversible, per-repo** migration. + +**Non-goals** +- Reinventing cryptographic primitives. +- Replacing the on-disk format with SOPS/age (we keep SC's format + `allow`/`disallow` UX). +- Putting an external SaaS vault in the deploy hot path by default. +- Moving application runtime secrets out of the store (separate concern). + +## Design + +### 1. v2 envelope (prerequisite) + +The current format encrypts each file independently for each recipient. To wrap keys +per recipient cleanly — and to keep KMS calls O(1) per deploy rather than O(files) — +introduce a **versioned v2 envelope**: + +- A single random **data-encryption key (DEK)** encrypts the payload once with an + AEAD (ChaCha20-Poly1305), per file-set (or per environment). +- The DEK is **wrapped once per recipient**. +- The AEAD includes **associated data (AAD)** binding the ciphertext to + `{format-version, path, environment, recipient-id}` so a wrapped DEK or ciphertext + cannot be transplanted into another context. +- Recipient wrapping standardizes on a **vetted public-key scheme** (an X25519-based + KEM for asymmetric recipients) and **KMS Encrypt/Decrypt** for KMS recipients. Key + material and algorithm identifiers are carried in **authenticated** headers. + +`secrets.yaml` gains an explicit `version` and a typed `recipients[]` list. + +### 2. Typed recipients + `KeyProvider` + +```go +type KeyProvider interface { + Wrap(ctx context.Context, dataKey []byte) (wrapped []byte, err error) + Unwrap(ctx context.Context, wrapped []byte) (dataKey []byte, err error) + Recipients() []RecipientRef // metadata only, no network, no decrypt +} +``` + +Recipient types: + +| Type | Private key location | CI auth | +|---|---|---| +| `local` (default) | a local private key (today's `SIMPLE_CONTAINER_CONFIG`) | the key itself | +| `aws-kms://` | AWS KMS | OIDC → STS → `kms:Decrypt` | +| `gcp-kms://` | GCP KMS | OIDC → Workload Identity Federation → `decrypt` | +| `azure-kv://` | Azure Key Vault | OIDC → federated credential | +| `vault://transit/` | HashiCorp Vault | OIDC/JWT auth → transit decrypt | + +`DecryptAll` selects the recipient by **authenticated recipient-id** (not "first that +succeeds") and unwraps via the matching provider. Any one recipient suffices; never all. + +KMS recipients reference **immutable key identifiers** (not mutable aliases) and bind a +KMS **encryption context** matching the AAD above. + +### 3. Keyless CI via OIDC + +When a KMS recipient is configured and the platform exposes an OIDC token, SC exchanges +the token for short-lived credentials scoped to `Decrypt` on the recipient key. On +GitHub Actions the runner only needs `id-token: write`; no long-lived secret is stored. +Every unwrap is recorded in the cloud audit log under the federated identity. + +To avoid handing the OIDC mint capability to arbitrary job steps, credential +acquisition should happen in **one isolated step** that passes only the resulting +short-lived credentials onward. + +### 4. Federated deploy credentials + +Independently of store decryption, the cloud credentials used by `sc deploy` can be +sourced from the same OIDC federation (`auth: { provider: oidc }`) instead of static +keys stored inside the secret store. This requires **first-class auth provider types** +that consume ambient federated credentials rather than deserializing static key +material. Static keys remain supported as a fallback. + +### 5. Environment-scoped recipients + +A recipient may be granted to a single environment. Combined with per-environment DEKs, +an untrusted/preview context can be limited to decrypting only its own environment and +never production. This is enforced by encrypting each environment under a distinct DEK +and never wrapping production's DEK to a preview-reachable recipient. + +### 6. Decouple repository checkout from the encryption key + +Where the store private key is currently reused as an SSH key to clone a private +parent/stacks repository, keyless mode needs a separate mechanism (e.g. a narrowly +scoped GitHub App installation token, or an existing deploy key). This is **orthogonal** +to encryption and is a **prerequisite** before the local key can be removed. + +## Authorization model (OIDC trust) + +The trust policy — not the runner — is the control. Hard-won requirements: + +- **Per-stack / per-environment roles**, not one shared role. A single role shared by + many consumers, trusted by `job_workflow_ref` alone, lets *any* repo that calls the + shared workflow assume it. +- **Pin the concrete caller**: immutable repository id **and** `job_workflow_ref` (to a + pinned ref), plus `aud`. **No wildcards** in the subject. +- **`ref` and `environment` subjects are mutually exclusive.** A job using a GitHub + Environment emits `repo:ORG/REPO:environment:NAME` with **no** `ref` component; a job + without one emits `...:ref:...`. You cannot pin both in one subject — choose per role. +- **Production = protected Environment with required reviewers.** OIDC trust alone is + not sufficient to gate production. +- **Never `pull_request_target` with PR-head checkout** in any workflow that can mint an + id-token or reach a recipient. +- **Attribution:** set a session name carrying repo + run id; enable cloud data-access + logging where it is off by default (e.g. GCP KMS), so a shared role does not erase + per-run attribution. + +## Backward compatibility & format versioning + +- The `local` recipient + existing config remain the **default**; nothing changes for + current users until they opt in. +- **Forward-compat guard (ship first):** older clients ignore unknown YAML fields and + rewrite only the fields they know, which would **silently drop** new recipients on the + next `allow`/`disallow`. A version-aware client that **fails closed** on an unknown + `version` must be released and rolled out **before** any store is written in v2. +- Migration uses existing verbs: `secrets allow --kms ` (add a KMS recipient), + verify keyless decrypt, then `secrets disallow `. Multi-recipient means + both work during overlap; per-repo; reversible. +- A `require-kms` (or equivalent) per-store setting disables the legacy path once a store + is fully migrated, to prevent silent downgrade back to the local key. +- Cloud-agnostic: customers on no cloud keep the `local` recipient permanently — it is + not a deprecation target. + +## Revocation reality + +Removing a recipient or rotating a KMS key does **not** revoke access to ciphertext +already committed to version control — history retains blobs decryptable by previously +valid recipients. Therefore: + +- **Removing a recipient MUST be paired with rotating the underlying secret values** + (and the upstream credential they represent), not just dropping a wrapper. +- The recipient list is effectively an **append-only audit surface**; changes to it + should be review-gated (code owners). + +## Operational considerations + +- **Availability:** decryption now depends on cloud IAM/KMS. Decrypt **all** required + material **up front**, before any infrastructure mutation, so a mid-deploy KMS error + cannot leave a half-applied stack. +- **Throughput/cost:** one DEK per file-set ⇒ one `Decrypt` per deploy, not one per + file. Use bounded retries with backoff + jitter; cache the unwrapped DEK in-process + for the run only. +- **Locality:** pin the KMS region to the deploy region; document cross-account key + policies. +- **Dependencies:** the cloud KMS SDKs become direct dependencies (today they are + transitive), expanding the SCA/dependency-update surface. + +## Threat model (summary) + +**Mitigates:** leak of a long-lived master key from CI; secret theft by untrusted +PR-build code (via scoped trust + environment-scoped recipients); lateral movement +after a single job compromise (scope + short TTL); long-lived static cloud credentials; +absence of audit; departed-operator access on the CI path; replay of a stolen token in +another repo (pinned `aud`/subject). + +**Does not fully mitigate (residual):** malicious code inside a *legitimate* +production job (gets short-lived scoped credentials during the run — bounded by scope, +TTL, branch protection, audit, and, for the highest-value targets, isolated runners); +application secrets being decrypted into the workload at deploy time; exfiltration over +otherwise-allowed network egress; compromise of the cloud account / IAM / KMS policy +itself; supply-chain compromise of the CLI or CI actions (mitigated by signed releases ++ digest pinning); phishing of operator cloud credentials. + +## Migration phases + +0. **Design + review** (this document). +1. **v2 envelope + version-aware fail-closed client**, released and rolled out before + any v2 write. Modernize the asymmetric recipient scheme as part of v2. +2. **`KeyProvider` + first KMS provider + OIDC acquisition**, behind a feature flag. +3. **Canary** on one low-risk staging stack. Gate: `sc deploy` obtains cloud + credentials from the federated environment, not the store; measured KMS latency; + verified rollback runbook. +4. **Migration UX** (`allow --kms`, `secrets doctor`/recipient listing, `require-kms`), + environment-scoped recipients, repo-checkout decoupling, docs. +5. **Federated deploy credentials** (`auth: { provider: oidc }`); drop static keys from + the store. +6. **Staging bake**, then **production** behind protected Environments, stack by stack. +7. **Decommission**: remove the local recipient, **rotate secret values**, delete static + credentials after a zero-usage soak. Add other cloud providers (GCP/Azure/Vault). + +## Open questions + +- Per-file-set vs per-environment DEK granularity (audit/scoping vs rewrap cost). +- How aggressively to enforce a minimum recipient-strength / forbidden-type policy. +- Whether `require-kms` is per-store, per-environment, or both. +- Provider parity: each KMS/Vault has distinct federation, audit, and key-versioning + semantics; ship one provider first behind an explicit contract, then add others with + per-provider review. From 0d7f6fff032054ae0eae400a795eab30c1c308fe Mon Sep 17 00:00:00 2001 From: Dmitrii Creed Date: Sat, 27 Jun 2026 19:53:40 +0400 Subject: [PATCH 3/6] =?UTF-8?q?docs(secrets):=20record=20decision=20?= =?UTF-8?q?=E2=80=94=20inline=20values=20+=20per-scope=20keys=20via=20SOPS?= =?UTF-8?q?,=20file-per-scope?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Folds the multi-model design review outcome into the RFC: adopt SOPS for the inline-value crypto/format, one readable file per scope (not a multiplexed single file — avoids the partial-MAC paradox), per-scope isolated keys, the non-negotiables (hard-fail on missing required secret, MAC+AAD, CODEOWNERS-gated recipients, plaintext lint, rotate-not-just-remove, OIDC->KMS provider keys), the strict-backcompat constraint (separate files + fail-closed version reader first), and a minimal v1. Resolves the format/native-vs-SOPS open questions. Signed-off-by: Dmitrii Creed --- docs/design/keyless-secrets/README.md | 59 +++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/docs/design/keyless-secrets/README.md b/docs/design/keyless-secrets/README.md index 49bc9f60..3279d65e 100644 --- a/docs/design/keyless-secrets/README.md +++ b/docs/design/keyless-secrets/README.md @@ -21,6 +21,63 @@ default; migration is additive, per-repo, and reversible). The crypto primitives are not reinvented; the change is the **key-custody model** plus a **v2 envelope format** required to support per-recipient key wrapping cleanly. +## Decision (2026-06-27): inline values + per-scope keys, via SOPS, file-per-scope + +After a multi-model design review, the concrete shape for the convenience-and-isolation +feature ("encrypt individual values inline, decryptable per scope, like ansible-vault") +is settled: + +- **Adopt SOPS (`getsops/sops`) as the inline-value crypto + format layer** — do **not** + hand-roll inline encryption, a file MAC, and per-backend KMS clients. SOPS already + provides inline value encryption (structure stays readable, only leaves opaque), a + whole-file MAC, partial decryption, and age / AWS-KMS / GCP-KMS / Azure-KV / Vault + recipients. (A just-fixed in-house ed25519 scheme that had zero confidentiality is the + decisive reason not to grow more bespoke crypto here.) +- **One readable file per scope** (`.sc/secrets..yaml`), **not** one multiplexed + file holding every scope. A single multi-scope file forces a "partial-MAC paradox" — a + holder of only the preview key cannot recompute a whole-file MAC over prod values it + cannot read — and would require hand-rolled per-value crypto. File-per-scope delivers + per-scope isolation with SOPS's standard whole-file MAC, in weeks rather than months. +- **A key decrypts only the scope file(s) it is a recipient of.** This replaces the + single all-powerful master key with per-scope, isolated, revocable keys (an age key, an + OIDC→KMS grant, or an isolated secret), so a preview deploy gets only the preview key + and cannot read prod. +- SC owns the thin layer SOPS does not: the scope→recipient config, deploy-time decrypt + with **hard-fail on a missing required secret**, a plaintext-leak lint, and provider + key delivery. + +A multiplexed single-file format and native (non-SOPS) crypto were considered and +rejected (months of work + crypto risk + the partial-MAC paradox). + +### Non-negotiables (must hold before merge) + +1. **Hard-fail on a missing required secret** at deploy — never substitute empty / + placeholder / ciphertext, never deploy partially. +2. **Mandatory MAC + AAD** binding path/scope/version; encrypt-then-MAC; always-random nonce. +3. **Scope→recipient mapping is CODEOWNERS-gated**, out of PR control — a MAC does not + stop re-encryption to an attacker-added recipient. +4. **Plaintext-leak lint** (`encrypted_regex` + CI gate), shipped *with* the feature. +5. **Rotation ≠ removing a recipient**: git history retains old ciphertext, so removing a + recipient requires rotating the underlying secret values. +6. **Provider keys via OIDC→KMS unwrap**, not handing a private key to CI (otherwise it is + just a smaller master key); a provider failure is a hard-fail; the OIDC trust policy is + the real perimeter (IaC + CODEOWNERS). + +### Strict backward compatibility (maintainer requirement) + +The existing whole-file `.sc/secrets.yaml` keeps working unchanged; the feature is +**additive and lives in separate files** old binaries never open. A **fail-closed version +reader must ship and bake fleet-wide first** — old binaries silently drop unknown YAML +fields on rewrite, so the new format must never live in `secrets.yaml`. A given path is +either whole-file (mode A) or inline (mode B), never both. + +### Minimal v1 + +SOPS, **file-per-scope**, age recipients, `sc secrets set` / `edit` + transparent decrypt +at `sc deploy` + the plaintext lint + hard-fail-on-missing. **No KMS/OIDC and no +multi-scope multiplex file in v1** — those are v2 (providers + governance) and a possible +later multiplex format only if practice demands it. + ## Motivation The SC envelope is already **multi-recipient** (`AddPublicKey`/`RemovePublicKey`, @@ -227,6 +284,8 @@ itself; supply-chain compromise of the CLI or CI actions (mitigated by signed re ## Open questions +- **Resolved:** multiplex-single-file vs file-per-scope → **file-per-scope** (see Decision); + native crypto vs SOPS → **SOPS** for the inline-value layer. - Per-file-set vs per-environment DEK granularity (audit/scoping vs rewrap cost). - How aggressively to enforce a minimum recipient-strength / forbidden-type policy. - Whether `require-kms` is per-store, per-environment, or both. From e81e689c6183bfaaad74662af2ec393c1cc1baca Mon Sep 17 00:00:00 2001 From: Dmitrii Creed Date: Sun, 28 Jun 2026 13:29:41 +0400 Subject: [PATCH 4/6] fix(secrets): make the store version guard fail-closed on the CLI too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The guard returned an error from unmarshalSecretsFile, but the root PersistentPreRunE inits with IgnoreAllErrors, so the CLI swallowed it and read a too-new store as empty — a subsequent write would then clobber the newer file (the exact data loss the guard prevents). It only fired on the GitHub-Actions path, which checks the error. Make the too-new-version error a sentinel (ErrSecretsStoreVersionUnsupported) and special-case it in root_cmd.Init so it stays fatal regardless of IgnoreConfigDirError. Found via the branch preview build (sc secrets list on a version:99 store returned empty, exit 0). Signed-off-by: Dmitrii Creed --- pkg/api/secrets/cryptor.go | 8 ++++++++ pkg/api/secrets/management.go | 11 +++++++---- pkg/api/secrets/version_guard_test.go | 5 +++++ pkg/cmd/root_cmd/root.go | 15 +++++++++++++-- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/pkg/api/secrets/cryptor.go b/pkg/api/secrets/cryptor.go index 5ef7f9bd..a6f8ae3b 100644 --- a/pkg/api/secrets/cryptor.go +++ b/pkg/api/secrets/cryptor.go @@ -4,6 +4,7 @@ package secrets import ( + "errors" "sync" "github.com/samber/lo" @@ -23,6 +24,13 @@ const EncryptedSecretFilesDataFileName = "secrets.yaml" // any higher-versioned store is ever written. const CurrentSecretsFileVersion = 0 +// ErrSecretsStoreVersionUnsupported is returned when the on-disk store declares a +// schema version newer than CurrentSecretsFileVersion. It MUST stay fatal on every +// read path — including ones that otherwise tolerate a missing/uninitialized store +// (root_cmd's IgnoreConfigDirError) — because reading a too-new store as empty and +// then writing would clobber it. Detect it with errors.Is. +var ErrSecretsStoreVersionUnsupported = errors.New("unsupported secrets store version") + type Cryptor interface { GenerateKeyPairWithProfile(projectName, profile string) error GenerateEd25519KeyPairWithProfile(projectName, profile string) error diff --git a/pkg/api/secrets/management.go b/pkg/api/secrets/management.go index f27a4cc9..f813583d 100644 --- a/pkg/api/secrets/management.go +++ b/pkg/api/secrets/management.go @@ -6,6 +6,7 @@ package secrets import ( "crypto/rsa" "encoding/asn1" + "fmt" "io" "io/fs" "os" @@ -167,10 +168,12 @@ func (c *cryptor) unmarshalSecretsFile() error { } else if res.Version > CurrentSecretsFileVersion { // Fail closed: a newer schema this build doesn't understand. Reading and // then writing would silently drop the fields we can't model and corrupt - // the store, so refuse outright. - return errors.Errorf( - "secrets file %q is version %d, but this sc build supports up to version %d; upgrade sc (refusing to read to avoid data loss)", - secretsFilePath, res.Version, CurrentSecretsFileVersion, + // the store, so refuse outright. Wrap the sentinel so callers that + // otherwise tolerate read errors (root_cmd's IgnoreConfigDirError) can + // still detect this one with errors.Is and keep it fatal. + return fmt.Errorf( + "secrets file %q is version %d, but this sc build supports up to version %d; upgrade sc (refusing to read to avoid data loss): %w", + secretsFilePath, res.Version, CurrentSecretsFileVersion, ErrSecretsStoreVersionUnsupported, ) } else { c.secrets = *res diff --git a/pkg/api/secrets/version_guard_test.go b/pkg/api/secrets/version_guard_test.go index c6f875a2..5bfdc367 100644 --- a/pkg/api/secrets/version_guard_test.go +++ b/pkg/api/secrets/version_guard_test.go @@ -4,6 +4,7 @@ package secrets import ( + "errors" "os" "path" "testing" @@ -28,6 +29,10 @@ func TestUnmarshalSecretsFile_RejectsNewerVersion(t *testing.T) { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("version 99")) Expect(err.Error()).To(ContainSubstring("refusing to read")) + // Must be detectable as the sentinel: root_cmd relies on errors.Is to keep + // this fatal even on the IgnoreConfigDirError CLI path (else a too-new store + // reads as empty and the next write clobbers it). + Expect(errors.Is(err, ErrSecretsStoreVersionUnsupported)).To(BeTrue()) } // TestUnmarshalSecretsFile_AcceptsCurrentVersion confirms back-compat: a store diff --git a/pkg/cmd/root_cmd/root.go b/pkg/cmd/root_cmd/root.go index a435f564..ef166ffc 100644 --- a/pkg/cmd/root_cmd/root.go +++ b/pkg/cmd/root_cmd/root.go @@ -5,6 +5,7 @@ package root_cmd import ( "context" + stderrors "errors" "os" "path" "path/filepath" @@ -18,6 +19,7 @@ import ( "github.com/simple-container-com/api/pkg/api" "github.com/simple-container-com/api/pkg/api/git" "github.com/simple-container-com/api/pkg/api/logger" + "github.com/simple-container-com/api/pkg/api/secrets" "github.com/simple-container-com/api/pkg/provisioner" ) @@ -108,8 +110,17 @@ func (c *RootCmd) Init(opts InitOpts) error { if err := c.Provisioner.Cryptor().ReadProfileConfig(); err != nil && !opts.IgnoreConfigDirError { return errors.Wrapf(err, "failed to read profile config, did you run `init`?") } - if err := c.Provisioner.Cryptor().ReadSecretFiles(); err != nil && !opts.IgnoreConfigDirError { - return errors.Wrapf(err, "failed to read secrets file, did you run `init`?") + if err := c.Provisioner.Cryptor().ReadSecretFiles(); err != nil { + // A store newer than this build understands is ALWAYS fatal — even when + // config-dir errors are ignored (e.g. IgnoreAllErrors from the root + // PersistentPreRunE). Otherwise the store reads as empty and the next + // write clobbers the newer file, which is exactly what the guard prevents. + if stderrors.Is(err, secrets.ErrSecretsStoreVersionUnsupported) { + return err + } + if !opts.IgnoreConfigDirError { + return errors.Wrapf(err, "failed to read secrets file, did you run `init`?") + } } return nil From a624dafba40fbeade4af2801ea1ea4f56294e2f3 Mon Sep 17 00:00:00 2001 From: Dmitrii Creed Date: Sun, 28 Jun 2026 15:28:59 +0400 Subject: [PATCH 5/6] fix(secrets): fail-closed version guard on the GitHub Actions read paths Review (Codex) found the guard was only half-wired: besides the CLI, the GitHub Actions deploy/reveal paths swallowed ReadSecretFiles errors as 'no client secrets -> use parent' (operation_executor revealSecrets x2, parent_repo revealParentRepositorySecrets, and the revealCurrentRepositorySecrets wrapper). A too-new store was silently treated as empty there too. Add secrets.IsUnsupportedStoreVersion(err) helper and special-case it at every tolerant read-path swallow site so the too-new-version error is ALWAYS fatal. Refactor root_cmd.Init to use the helper. Cover the helper contract with a unit test; document the version field + fail-closed behaviour in the secrets guide. Signed-off-by: Dmitrii Creed --- docs/docs/guides/secrets-management.md | 21 +++++++++++++++++++ pkg/api/secrets/cryptor.go | 10 +++++++++ pkg/api/secrets/version_guard_test.go | 17 +++++++++++++++ pkg/cmd/root_cmd/root.go | 3 +-- .../actions/operation_executor.go | 7 +++++++ pkg/githubactions/actions/parent_repo.go | 6 ++++++ 6 files changed, 62 insertions(+), 2 deletions(-) diff --git a/docs/docs/guides/secrets-management.md b/docs/docs/guides/secrets-management.md index 131c8917..146abfb3 100644 --- a/docs/docs/guides/secrets-management.md +++ b/docs/docs/guides/secrets-management.md @@ -31,6 +31,27 @@ Each recipient public key gets its own encryption of the secret, using a scheme - **`ssh-rsa` recipients** — RSA-OAEP (SHA-256). - **`ssh-ed25519` recipients** — an ephemeral-static **X25519 ECDH** sealed box (HKDF-SHA256 + ChaCha20-Poly1305): the content key is derived from the ECDH shared secret, so only the holder of the ed25519 private key can decrypt. +### Store format & version compatibility + +The encrypted store `.sc/secrets.yaml` carries an optional `version` field that +identifies its schema. The current format is **version 0** — written without an +explicit `version:` field, so existing stores are unchanged. + +`sc` is **fail-closed** on this version: a build refuses to read a store whose +`version` is newer than it understands, rather than silently dropping fields it +cannot model and corrupting the store on the next write. The error looks like: + +``` +secrets file ".sc/secrets.yaml" is version N, but this sc build supports up to +version M; upgrade sc (refusing to read to avoid data loss) +``` + +This guarantee holds on **every** path — the local CLI and the GitHub Actions +deploy/reveal flows (a too-new store halts the run instead of falling back to +"no secrets"). The practical implication: **keep `sc` up to date across your team +and CI** before any newer store format is adopted, so that no older binary is +left unable to read — or, worse, able to overwrite — the new on-disk schema. + ## Prerequisites Before working with secrets, ensure you have: diff --git a/pkg/api/secrets/cryptor.go b/pkg/api/secrets/cryptor.go index a6f8ae3b..8706bd17 100644 --- a/pkg/api/secrets/cryptor.go +++ b/pkg/api/secrets/cryptor.go @@ -31,6 +31,16 @@ const CurrentSecretsFileVersion = 0 // then writing would clobber it. Detect it with errors.Is. var ErrSecretsStoreVersionUnsupported = errors.New("unsupported secrets store version") +// IsUnsupportedStoreVersion reports whether err indicates the on-disk secrets +// store declares a newer schema version than this build understands. Read paths +// that otherwise tolerate a missing/uninitialized store (the CLI's +// IgnoreConfigDirError, the GitHub Actions "no client secrets -> use parent" +// fallbacks) MUST treat a true result as FATAL and never swallow it as "no +// secrets" — ignoring a too-new store risks a later write clobbering it. +func IsUnsupportedStoreVersion(err error) bool { + return errors.Is(err, ErrSecretsStoreVersionUnsupported) +} + type Cryptor interface { GenerateKeyPairWithProfile(projectName, profile string) error GenerateEd25519KeyPairWithProfile(projectName, profile string) error diff --git a/pkg/api/secrets/version_guard_test.go b/pkg/api/secrets/version_guard_test.go index 5bfdc367..780095fa 100644 --- a/pkg/api/secrets/version_guard_test.go +++ b/pkg/api/secrets/version_guard_test.go @@ -47,3 +47,20 @@ func TestUnmarshalSecretsFile_AcceptsCurrentVersion(t *testing.T) { Expect(c.ReadSecretFiles()).To(Succeed()) } + +// TestIsUnsupportedStoreVersion guards the contract every tolerant read-path +// caller relies on (root_cmd.Init + the GitHub Actions reveal fallbacks): the +// too-new-store error must be detectable via the helper so those callers keep it +// fatal instead of swallowing it as "no secrets". +func TestIsUnsupportedStoreVersion(t *testing.T) { + RegisterTestingT(t) + c, wd, cleanup := newTestCryptor(t) + defer cleanup() + + secretsPath := path.Join(wd, api.ScConfigDirectory, EncryptedSecretFilesDataFileName) + Expect(os.WriteFile(secretsPath, []byte("version: 99\nregistry:\n files: []\nsecrets: {}\n"), 0o600)).To(Succeed()) + + Expect(IsUnsupportedStoreVersion(c.ReadSecretFiles())).To(BeTrue()) + Expect(IsUnsupportedStoreVersion(nil)).To(BeFalse()) + Expect(IsUnsupportedStoreVersion(errors.New("some other read error"))).To(BeFalse()) +} diff --git a/pkg/cmd/root_cmd/root.go b/pkg/cmd/root_cmd/root.go index ef166ffc..b31012ef 100644 --- a/pkg/cmd/root_cmd/root.go +++ b/pkg/cmd/root_cmd/root.go @@ -5,7 +5,6 @@ package root_cmd import ( "context" - stderrors "errors" "os" "path" "path/filepath" @@ -115,7 +114,7 @@ func (c *RootCmd) Init(opts InitOpts) error { // config-dir errors are ignored (e.g. IgnoreAllErrors from the root // PersistentPreRunE). Otherwise the store reads as empty and the next // write clobbers the newer file, which is exactly what the guard prevents. - if stderrors.Is(err, secrets.ErrSecretsStoreVersionUnsupported) { + if secrets.IsUnsupportedStoreVersion(err) { return err } if !opts.IgnoreConfigDirError { diff --git a/pkg/githubactions/actions/operation_executor.go b/pkg/githubactions/actions/operation_executor.go index 3d137c09..1b1b4658 100644 --- a/pkg/githubactions/actions/operation_executor.go +++ b/pkg/githubactions/actions/operation_executor.go @@ -11,6 +11,7 @@ import ( "time" "github.com/simple-container-com/api/pkg/api" + "github.com/simple-container-com/api/pkg/api/secrets" ) // OperationType defines the type of operation @@ -178,6 +179,9 @@ func (e *Executor) revealSecrets(ctx context.Context, config OperationConfig) er // First, load the secrets.yaml file into the cryptor e.logger.Debug(ctx, "🔧 Loading secrets.yaml file into cryptor...") if err := e.provisioner.Cryptor().ReadSecretFiles(); err != nil { + if secrets.IsUnsupportedStoreVersion(err) { + return err // fail closed: never treat a too-new store as "no secrets" + } e.logger.Info(ctx, "ℹ️ No client secrets found - using parent repository secrets") return nil // No secrets to reveal, will use parent secrets } @@ -203,6 +207,9 @@ func (e *Executor) revealSecrets(ctx context.Context, config OperationConfig) er // First, load the secrets.yaml file into the cryptor e.logger.Debug(ctx, "🔧 Loading secrets.yaml file into cryptor...") if err := e.provisioner.Cryptor().ReadSecretFiles(); err != nil { + if secrets.IsUnsupportedStoreVersion(err) { + return err // fail closed: never treat a too-new store as "no secrets" + } e.logger.Warn(ctx, "Failed to read secrets file: %v", err) e.logger.Info(ctx, "🔍 This is expected if parent repository has no secrets") return nil // No secrets to reveal diff --git a/pkg/githubactions/actions/parent_repo.go b/pkg/githubactions/actions/parent_repo.go index 40a5729c..893fb2f9 100644 --- a/pkg/githubactions/actions/parent_repo.go +++ b/pkg/githubactions/actions/parent_repo.go @@ -281,6 +281,9 @@ func (e *Executor) cloneParentRepository(ctx context.Context) error { if !isParentOperation && scConfig.ParentRepository != "" { e.logger.Info(ctx, "🔑 Client stack with parent repository - revealing secrets in current workspace...") if err := e.revealCurrentRepositorySecrets(ctx, scConfig); err != nil { + if secrets.IsUnsupportedStoreVersion(err) { + return err // fail closed: a too-new store must halt, not fall back to parent + } e.logger.Warn(ctx, "Failed to reveal client secrets (may use parent secrets): %v", err) // Don't fail the entire deployment - parent secrets might be sufficient } else { @@ -391,6 +394,9 @@ func (e *Executor) setupParentRepositorySecrets(ctx context.Context, scConfig *a e.logger.Info(ctx, "🔧 Loading secrets.yaml file into cryptor...") if err := parentCryptor.ReadSecretFiles(); err != nil { + if secrets.IsUnsupportedStoreVersion(err) { + return false, err // fail closed: never treat a too-new store as "no secrets" + } e.logger.Warn(ctx, "Failed to read secrets file: %v", err) e.logger.Info(ctx, "🔍 This is expected if parent repository has no secrets") return false, nil // No secrets loaded, but not an error From 997acbbe4cbfb5acd00b5f2088883fd9c08a6a9f Mon Sep 17 00:00:00 2001 From: Dmitrii Creed Date: Sun, 28 Jun 2026 18:35:37 +0400 Subject: [PATCH 6/6] refactor(secrets): rename version -> schemaVersion (review feedback) Rename the secrets.yaml schema field 'version' to 'schemaVersion' (clearer than a bare 'version', which reads like a data/app version) per dev review. Renames the on-disk key + Go field (SchemaVersion) + const (CurrentSecretsSchemaVersion) and updates the guard error wording, tests (keyed on schemaVersion), and the secrets guide. Helper/sentinel names unchanged. No behaviour change; back-compat preserved (absent = 0). Signed-off-by: Dmitrii Creed --- docs/docs/guides/secrets-management.md | 16 ++++++++-------- pkg/api/secrets/cryptor.go | 20 ++++++++++---------- pkg/api/secrets/management.go | 6 +++--- pkg/api/secrets/version_guard_test.go | 4 ++-- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/docs/guides/secrets-management.md b/docs/docs/guides/secrets-management.md index 146abfb3..59462e44 100644 --- a/docs/docs/guides/secrets-management.md +++ b/docs/docs/guides/secrets-management.md @@ -33,17 +33,17 @@ Each recipient public key gets its own encryption of the secret, using a scheme ### Store format & version compatibility -The encrypted store `.sc/secrets.yaml` carries an optional `version` field that -identifies its schema. The current format is **version 0** — written without an -explicit `version:` field, so existing stores are unchanged. +The encrypted store `.sc/secrets.yaml` carries an optional `schemaVersion` field +that identifies its schema. The current format is **schema version 0** — written +without an explicit `schemaVersion:` field, so existing stores are unchanged. -`sc` is **fail-closed** on this version: a build refuses to read a store whose -`version` is newer than it understands, rather than silently dropping fields it -cannot model and corrupting the store on the next write. The error looks like: +`sc` is **fail-closed** on this: a build refuses to read a store whose +`schemaVersion` is newer than it understands, rather than silently dropping fields +it cannot model and corrupting the store on the next write. The error looks like: ``` -secrets file ".sc/secrets.yaml" is version N, but this sc build supports up to -version M; upgrade sc (refusing to read to avoid data loss) +secrets file ".sc/secrets.yaml" is schema version N, but this sc build supports up +to schema version M; upgrade sc (refusing to read to avoid data loss) ``` This guarantee holds on **every** path — the local CLI and the GitHub Actions diff --git a/pkg/api/secrets/cryptor.go b/pkg/api/secrets/cryptor.go index 8706bd17..9eaa363a 100644 --- a/pkg/api/secrets/cryptor.go +++ b/pkg/api/secrets/cryptor.go @@ -15,17 +15,17 @@ import ( const EncryptedSecretFilesDataFileName = "secrets.yaml" -// CurrentSecretsFileVersion is the highest secrets.yaml schema version this build -// understands. A store with no `version` field is treated as version 0 (the +// CurrentSecretsSchemaVersion is the highest secrets.yaml schema version this build +// understands. A store with no `schemaVersion` field is treated as version 0 (the // original, current format). When a future format bumps this, OLDER binaries -// (which carry a lower CurrentSecretsFileVersion) refuse to read it — see the +// (which carry a lower CurrentSecretsSchemaVersion) refuse to read it — see the // guard in unmarshalSecretsFile — instead of silently dropping the new fields on // the next write. This reader must therefore ship and roll out fleet-wide BEFORE // any higher-versioned store is ever written. -const CurrentSecretsFileVersion = 0 +const CurrentSecretsSchemaVersion = 0 // ErrSecretsStoreVersionUnsupported is returned when the on-disk store declares a -// schema version newer than CurrentSecretsFileVersion. It MUST stay fatal on every +// schema version newer than CurrentSecretsSchemaVersion. It MUST stay fatal on every // read path — including ones that otherwise tolerate a missing/uninitialized store // (root_cmd's IgnoreConfigDirError) — because reading a too-new store as empty and // then writing would clobber it. Detect it with errors.Is. @@ -103,11 +103,11 @@ func (c *cryptor) PrivateKey() string { } type EncryptedSecretFiles struct { - // Version is the secrets.yaml schema version. Absent/0 = the original format. - // A reader refuses any version above CurrentSecretsFileVersion (fail-closed). - Version int `json:"version,omitempty" yaml:"version,omitempty"` - Registry Registry `json:"registry" yaml:"registry"` - Secrets map[string]EncryptedSecrets `json:"secrets" yaml:"secrets"` + // SchemaVersion is the secrets.yaml schema version. Absent/0 = the original + // format. A reader refuses any value above CurrentSecretsSchemaVersion (fail-closed). + SchemaVersion int `json:"schemaVersion,omitempty" yaml:"schemaVersion,omitempty"` + Registry Registry `json:"registry" yaml:"registry"` + Secrets map[string]EncryptedSecrets `json:"secrets" yaml:"secrets"` } type EncryptedSecrets struct { diff --git a/pkg/api/secrets/management.go b/pkg/api/secrets/management.go index f813583d..ca0fa4e0 100644 --- a/pkg/api/secrets/management.go +++ b/pkg/api/secrets/management.go @@ -165,15 +165,15 @@ func (c *cryptor) unmarshalSecretsFile() error { } if res, err := api.UnmarshalDescriptor[EncryptedSecretFiles](secretsFileData); err != nil || res == nil { return errors.Wrapf(err, "failed to unmarshal secrets file: %q", secretsFilePath) - } else if res.Version > CurrentSecretsFileVersion { + } else if res.SchemaVersion > CurrentSecretsSchemaVersion { // Fail closed: a newer schema this build doesn't understand. Reading and // then writing would silently drop the fields we can't model and corrupt // the store, so refuse outright. Wrap the sentinel so callers that // otherwise tolerate read errors (root_cmd's IgnoreConfigDirError) can // still detect this one with errors.Is and keep it fatal. return fmt.Errorf( - "secrets file %q is version %d, but this sc build supports up to version %d; upgrade sc (refusing to read to avoid data loss): %w", - secretsFilePath, res.Version, CurrentSecretsFileVersion, ErrSecretsStoreVersionUnsupported, + "secrets file %q is schema version %d, but this sc build supports up to schema version %d; upgrade sc (refusing to read to avoid data loss): %w", + secretsFilePath, res.SchemaVersion, CurrentSecretsSchemaVersion, ErrSecretsStoreVersionUnsupported, ) } else { c.secrets = *res diff --git a/pkg/api/secrets/version_guard_test.go b/pkg/api/secrets/version_guard_test.go index 780095fa..0e061c10 100644 --- a/pkg/api/secrets/version_guard_test.go +++ b/pkg/api/secrets/version_guard_test.go @@ -23,7 +23,7 @@ func TestUnmarshalSecretsFile_RejectsNewerVersion(t *testing.T) { defer cleanup() secretsPath := path.Join(wd, api.ScConfigDirectory, EncryptedSecretFilesDataFileName) - Expect(os.WriteFile(secretsPath, []byte("version: 99\nregistry:\n files: []\nsecrets: {}\n"), 0o600)).To(Succeed()) + Expect(os.WriteFile(secretsPath, []byte("schemaVersion: 99\nregistry:\n files: []\nsecrets: {}\n"), 0o600)).To(Succeed()) err := c.ReadSecretFiles() Expect(err).To(HaveOccurred()) @@ -58,7 +58,7 @@ func TestIsUnsupportedStoreVersion(t *testing.T) { defer cleanup() secretsPath := path.Join(wd, api.ScConfigDirectory, EncryptedSecretFilesDataFileName) - Expect(os.WriteFile(secretsPath, []byte("version: 99\nregistry:\n files: []\nsecrets: {}\n"), 0o600)).To(Succeed()) + Expect(os.WriteFile(secretsPath, []byte("schemaVersion: 99\nregistry:\n files: []\nsecrets: {}\n"), 0o600)).To(Succeed()) Expect(IsUnsupportedStoreVersion(c.ReadSecretFiles())).To(BeTrue()) Expect(IsUnsupportedStoreVersion(nil)).To(BeFalse())