From 5275617a860f4abbbf0bdc7d9b063eb6952cf984 Mon Sep 17 00:00:00 2001 From: Yiftach Cohen Date: Thu, 4 Jun 2026 16:05:35 +0200 Subject: [PATCH 1/2] =?UTF-8?q?[PPSC-879]=20feat(supply-chain):=20DX=20pol?= =?UTF-8?q?ish=20=E2=80=94=20wrap=20summary,=20error=20UX,=20CI=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 65 +++ docs/CHANGELOG.md | 17 + .../github-actions-supply-chain.yml | 49 ++ internal/cmd/supply_chain.go | 34 ++ internal/cmd/supply_chain_check.go | 67 ++- internal/cmd/supply_chain_check_test.go | 169 +++++++ internal/cmd/supply_chain_init.go | 21 + internal/cmd/supply_chain_init_test.go | 20 + internal/cmd/supply_chain_test.go | 72 +++ internal/cmd/supply_chain_wrap.go | 445 +++++++++++++++--- internal/cmd/supply_chain_wrap_pm_test.go | 20 +- .../cmd/supply_chain_wrap_summary_test.go | 301 ++++++++++++ internal/cmd/supply_chain_wrap_test.go | 283 +++++++++++ internal/output/human.go | 8 +- internal/output/human_format_test.go | 26 + internal/supplychain/check/check.go | 20 +- internal/supplychain/check/check_test.go | 254 +++++++++- internal/supplychain/config.go | 95 ++++ internal/supplychain/config_test.go | 100 +++- internal/supplychain/proxy.go | 222 ++++++++- internal/supplychain/proxy_test.go | 347 ++++++++++++++ internal/supplychain/registry/pypi_test.go | 73 +++ internal/supplychain/shell_test.go | 45 ++ internal/supplychain/supplychain.go | 5 +- internal/supplychain/supplychain_test.go | 5 + 25 files changed, 2673 insertions(+), 90 deletions(-) create mode 100644 docs/ci-examples/github-actions-supply-chain.yml create mode 100644 internal/cmd/supply_chain_test.go create mode 100644 internal/cmd/supply_chain_wrap_summary_test.go diff --git a/README.md b/README.md index 952e4252..8db03ca6 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ Enterprise-grade CLI for static application security scanning with Armis Cloud. - [Verification](#verification) - [Quick Start](#quick-start) - [Usage](#usage) +- [Supply Chain Protection](#supply-chain-protection) - [Output Formats](#output-formats) - [CI/CD Integration](#cicd-integration) - [Environment Variables](#environment-variables) @@ -47,6 +48,7 @@ Enterprise-grade CLI for static application security scanning with Armis Cloud. - Multiple output formats: human, JSON, SARIF, JUnit XML - **SBOM generation**: Generate CycloneDX Software Bill of Materials - **VEX generation**: Generate Vulnerability Exploitability eXchange documents +- **Supply chain protection**: Block packages published too recently (typosquatting, compromised maintainers, dependency confusion) across npm, Python, and Java — no Armis Cloud auth required - CI/CD ready: GitHub Actions, Jenkins, GitLab, Azure, Bitbucket, CircleCI - Configurable exit codes and fail-on severity - Secure authentication, size limits, and best practices @@ -456,6 +458,69 @@ armis-cli scan image nginx:latest --pull=never --- +## Supply Chain Protection + +The `supply-chain` command enforces a minimum **release age** on your dependencies. Packages published more recently than the threshold (default 72h) are flagged or blocked — a cheap, effective defense against typosquatting, compromised maintainer accounts, and dependency-confusion attacks, which almost always rely on a freshly published malicious version. + +No Armis Cloud authentication is required: `supply-chain` queries public registries (npm, PyPI, Maven Central) directly. + +**Supported ecosystems:** npm, pnpm, bun, yarn (Node); pip, uv, poetry, pipenv, pdm (Python); Maven, Gradle (Java). + +### Audit a lockfile (CI) + +```bash +# Audit the lockfile in the current directory (auto-detected) +armis-cli supply-chain check + +# Custom threshold, exclude your own scoped packages, fail the build on findings +armis-cli supply-chain check --min-age 7d --exclude "@myorg/*" --fail-on medium + +# Machine-readable output for CI +armis-cli supply-chain check --format sarif --fail-on high +``` + +By default `check` only reports packages that are **new** versus the base branch lockfile (auto-detected from `origin/main`). Use `--all` to audit every package, and `--fail-open` to pass when the registry is unreachable. + +> **Fail the build:** `check` reports findings as MEDIUM/HIGH severity. To gate CI, pass `--fail-on medium` (or `high`) — the default `--fail-on` is CRITICAL, which supply-chain findings never reach. + +### Enforce locally during installs + +```bash +# Wrap your package managers in ~/.bashrc / ~/.zshrc (interactive) +armis-cli supply-chain init + +# Preview changes without writing +armis-cli supply-chain init --dry-run + +# Generate a committable policy file (.armis-supply-chain.yaml) +armis-cli supply-chain init --mode config + +# Show the active policy, detected ecosystems, and shell status +armis-cli supply-chain status + +# Remove the shell wrappers +armis-cli supply-chain uninit +``` + +### Configuration + +Commit a `.armis-supply-chain.yaml` to share policy with your team: + +```yaml +version: 1 +min-age: 72h +exclusions: + - "@myorg/*" +# ecosystems: # optional: restrict to specific ecosystems (default: all detected) +# - npm +# - pip +fail-open: false +``` + +Bypass for a single command with `ARMIS_SUPPLY_CHAIN_SKIP=`; disable enforcement entirely with `ARMIS_SUPPLY_CHAIN=off`. + +--- + ## Output Formats ### Human-Readable (Default) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index ba2c0df6..f275c14d 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -9,6 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `supply-chain` command for enforcing package release-age policies, defending against supply-chain attacks (typosquatting, compromised maintainers, dependency confusion) by flagging or blocking packages published more recently than a configurable threshold (default 72h). No Armis Cloud authentication required — queries public registries directly. (#206, #210, #211) + - Supports 11 package managers across three ecosystems: npm, pnpm, bun, yarn (Node); pip, uv, poetry, pipenv, pdm (Python); Maven, Gradle (Java). + - Node package managers and pip/uv use a transparent registry proxy that filters out too-young versions during install; poetry, pipenv, pdm, Maven, and Gradle use a pre-install lockfile audit that blocks the build before execution. + - `supply-chain check` audits lockfiles in CI; `supply-chain init`/`uninit` set up local shell enforcement; `supply-chain status` reports the active policy and detected ecosystems. + - Configurable via `.armis-supply-chain.yaml` (`min-age`, `exclusions`, `ecosystems`, `fail-open`); per-invocation bypass via `ARMIS_SUPPLY_CHAIN_SKIP`; master kill switch via `ARMIS_SUPPLY_CHAIN=off`. + - Gradle lockfile staleness detection (warns when `build.gradle` is newer than `gradle.lockfile`), Maven `pom.xml` partial-coverage notice (direct dependencies only), and a warning for unrecognized ecosystem names in the config. + - The `ecosystems` config field accepts both `pipenv` (the tool name shown in `--help`) and `pipfile` (the internal name) so either spelling works. + - The install summary reports each filtered package on one line showing the too-new version, its age, and the older version installed in its place (e.g. `axios 1.17.0 (1 day old) → 1.16.1 installed`). When every package resolves to a safe version it reads as a success; packages with no older safe version are called out individually. If the package manager itself does not complete (for example a dependency pins a version that only the filtered release satisfies), the summary reports the safe version as "available" rather than claiming it was installed, and explains how to relax or exclude the constraint. A one-time explanation of why fresh releases are withheld is shown on the first filtered install in an interactive terminal (suppressed thereafter and in CI). + ### Changed ### Deprecated @@ -17,8 +26,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- `supply-chain check`: `--fail-on` now accepts lowercase severities (e.g. `--fail-on medium`) and validates the value, matching `scan repo`/`scan image`. Previously a lowercase or invalid value was silently ignored, so the CI gate never fired and a real violation exited 0. +- `supply-chain`: an unknown subcommand (e.g. a typo like `chekc`) now exits non-zero with a "Did you mean" suggestion instead of printing help and exiting 0. +- `supply-chain check`: `--min-age` parse errors no longer print the duration twice; the message now suggests valid formats (`72h`, `3d`, `1w`). Output reads "1 package" (not "1 packages"), and the empty "Scan ID:" line is omitted for the local audit. +- `supply-chain check`: base-lockfile auto-detection now bounds its `git` subprocesses with a timeout (and honors cancellation), so a wedged or misconfigured `git` invocation can no longer hang the command indefinitely. +- `supply-chain`: the config `ecosystems` field now actually scopes enforcement. Previously it was parsed and typo-checked but ignored, so `ecosystems: [npm]` still enforced every ecosystem. `check` now skips an out-of-scope lockfile, `wrap` passes an out-of-scope package manager straight through, and `init` only wraps in-scope package managers. The gate fails safe: an empty list (or a list of only unrecognized names) enforces everything, so a typo cannot silently disable the control. + ### Security +- `supply-chain wrap` (pip/uv): age enforcement now actually filters. The local-enforcement proxy previously only understood the npm registry format, so pip and uv installs were pointed at the proxy but their PyPI Simple API requests passed through unfiltered — young packages installed silently. The proxy now speaks the PyPI Simple API (PEP 691/700 JSON), removing distribution files published more recently than the policy threshold; a file with no upload timestamp is removed (fail-closed) rather than allowed. + --- ## [1.10.2] - 2026-05-28 diff --git a/docs/ci-examples/github-actions-supply-chain.yml b/docs/ci-examples/github-actions-supply-chain.yml new file mode 100644 index 00000000..c27e7e15 --- /dev/null +++ b/docs/ci-examples/github-actions-supply-chain.yml @@ -0,0 +1,49 @@ +--- +# GitHub Actions - Supply Chain Release-Age Check +# +# Audits your lockfile for packages published too recently — a cheap, effective +# defense against typosquatting, compromised maintainer accounts, and +# dependency-confusion attacks, which almost always ship a freshly published +# malicious version. +# +# No Armis Cloud authentication is required: supply-chain queries public +# registries (npm, PyPI, Maven Central) directly, so this workflow needs no +# secrets. +# +# Notes: +# - By default `check` reports only packages that are NEW versus the base +# branch lockfile. On PRs this focuses the result on what the PR introduces. +# - Findings are MEDIUM/HIGH severity, so --fail-on medium is required to gate +# the build (the default --fail-on is CRITICAL, which these never reach). +# - Add --fail-open if you would rather pass than block when the registry is +# temporarily unreachable. + +name: Supply Chain Check + +on: + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + supply-chain: + name: Package Release-Age Audit + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + # Fetch the base branch so `check` can diff against origin/main and + # report only newly introduced packages. + fetch-depth: 0 + + - name: Install armis-cli + run: curl -sSL https://raw.githubusercontent.com/ArmisSecurity/armis-cli/main/scripts/install.sh | bash + + - name: Audit dependencies for recently-published packages + run: | + armis-cli supply-chain check \ + --min-age 72h \ + --fail-on medium diff --git a/internal/cmd/supply_chain.go b/internal/cmd/supply_chain.go index e46cc754..9d347e41 100644 --- a/internal/cmd/supply_chain.go +++ b/internal/cmd/supply_chain.go @@ -1,6 +1,10 @@ package cmd import ( + "fmt" + "os" + "strings" + "github.com/ArmisSecurity/armis-cli/internal/supplychain" "github.com/spf13/cobra" ) @@ -19,6 +23,15 @@ func loadConfigUpward(dir string) (*supplychain.Config, string, error) { if err != nil { return nil, configDir, err } + if cfg != nil { + // Surface typos in the config's "ecosystems" list once, here, so every + // command that loads the config (check/status/wrap) reports them + // consistently rather than silently ignoring an unrecognized name. + if unknown := cfg.UnknownEcosystems(); len(unknown) > 0 { + fmt.Fprintf(os.Stderr, "Warning: unknown ecosystem(s) %s in %s — supported: %s\n", + strings.Join(unknown, ", "), supplychain.ConfigFileName, supplychain.KnownEcosystemsHint()) + } + } return cfg, configDir, nil } @@ -50,6 +63,27 @@ No Armis Cloud authentication is required — supply-chain queries public regist # Check what supply-chain init would do armis-cli supply-chain init --dry-run`, + // A parent command with no RunE prints help and exits 0 on an unknown + // subcommand, so `supply-chain chekc` silently "succeeds" in CI. Reject + // unknown args with a non-zero exit and a "did you mean" suggestion; with no + // args, fall back to the usual help text. + RunE: func(cmd *cobra.Command, args []string) error { + if len(args) == 0 { + return cmd.Help() + } + // SuggestionsMinimumDistance is 0 here because cobra only sets its default + // (2) deeper in its own execute path, which we bypass by calling + // SuggestionsFor directly. Set it so close typos like "chekc"→"check" are + // offered. + if cmd.SuggestionsMinimumDistance <= 0 { + cmd.SuggestionsMinimumDistance = 2 + } + err := fmt.Errorf("unknown subcommand %q for %q", args[0], cmd.CommandPath()) + if suggestions := cmd.SuggestionsFor(args[0]); len(suggestions) > 0 { + err = fmt.Errorf("%w\n\nDid you mean this?\n\t%s", err, strings.Join(suggestions, "\n\t")) + } + return err + }, } func init() { diff --git a/internal/cmd/supply_chain_check.go b/internal/cmd/supply_chain_check.go index 23f1cab6..c9d9591a 100644 --- a/internal/cmd/supply_chain_check.go +++ b/internal/cmd/supply_chain_check.go @@ -1,11 +1,13 @@ package cmd import ( + "context" "fmt" "os" "os/exec" "path/filepath" "strings" + "time" "github.com/ArmisSecurity/armis-cli/internal/cli" "github.com/ArmisSecurity/armis-cli/internal/model" @@ -15,6 +17,13 @@ import ( "github.com/spf13/cobra" ) +// baseDetectGitTimeout bounds the git subprocesses used to fetch the base +// lockfile. git base detection only reads local objects, but a misconfigured +// remote or filesystem can wedge a git invocation indefinitely; this ceiling +// keeps `supply-chain check` from hanging on it. The parent command context is +// still honored, so SIGINT cancels sooner than this. +const baseDetectGitTimeout = 15 * time.Second + var ( scMinAge string scExclude []string @@ -93,13 +102,32 @@ func runSupplyChainCheck(cmd *cobra.Command, args []string) error { return fmt.Errorf("lockfile not found: %s", lockfilePath) } + // Respect the config's "ecosystems" scope: if it restricts enforcement and + // this lockfile's ecosystem is excluded, skip the audit and report a clean + // pass rather than checking an out-of-scope ecosystem. loadConfigUpward + // returns nil (enforce-all) when no config is present, and EnforcesEcosystem + // fails safe on an all-typo list. + cfg, _, err := loadConfigUpward(dir) + if err != nil { + return err + } + eco := check.DetectEcosystemFromPath(lockfilePath) + // armis:ignore cwe:476 reason:EnforcesEcosystem has an explicit nil-receiver guard (returns true when c==nil), so calling it on a nil cfg is safe by design + if !cfg.EnforcesEcosystem(eco) { + s := output.GetStyles() + fmt.Fprintf(os.Stderr, "%s %s\n", + s.MutedText.Render("[armis]"), + s.MutedText.Render(fmt.Sprintf("supply-chain: %s not in configured ecosystems, skipping", eco))) + return nil + } + var baseLockfile string var autoDetectedBase bool if !scAll { if scBaseLockfile != "" { baseLockfile = scBaseLockfile } else { - baseLockfile = detectBaseLockfile(lockfilePath) + baseLockfile = detectBaseLockfile(cmd.Context(), lockfilePath) autoDetectedBase = baseLockfile != "" } } @@ -129,8 +157,9 @@ func runSupplyChainCheck(cmd *cobra.Command, args []string) error { s := output.GetStyles() fmt.Fprintf(os.Stderr, "%s %s\n", s.MutedText.Render("[armis]"), - s.MutedText.Render(fmt.Sprintf("supply-chain: checked %d packages, %d skipped, %d violations (%s policy)", - result.Checked, result.Skipped, len(result.Violations), policy.MinReleaseAge))) + s.MutedText.Render(fmt.Sprintf("supply-chain: checked %s, %d skipped, %s (%s policy)", + countNoun(result.Checked, "package"), result.Skipped, + countNoun(len(result.Violations), "violation"), policy.MinReleaseAge))) findings := make([]model.Finding, 0, len(result.Violations)) for _, v := range result.Violations { @@ -161,7 +190,24 @@ func runSupplyChainCheck(cmd *cobra.Command, args []string) error { return fmt.Errorf("formatting output: %w", err) } - return output.CheckExit(scanResult, failOn, exitCode) + // Use getFailOn() (not the raw failOn global) so --fail-on is validated and + // case-normalized to uppercase. ShouldFail matches severities exactly, so a + // lowercase "medium" would otherwise never match a "MEDIUM" finding and the + // CI gate would silently pass. The scan commands already route through here. + failOnSeverities, err := getFailOn() + if err != nil { + return err + } + return output.CheckExit(scanResult, failOnSeverities, exitCode) +} + +// countNoun formats a count with its noun, pluralizing with a trailing "s" when +// the count is not exactly 1 (e.g. "1 package", "2 packages", "0 violations"). +func countNoun(n int, noun string) string { + if n == 1 { + return fmt.Sprintf("%d %s", n, noun) + } + return fmt.Sprintf("%d %ss", n, noun) } func buildSummary(findings []model.Finding) model.Summary { @@ -181,11 +227,16 @@ func buildSummary(findings []model.Finding) model.Summary { return summary } -func detectBaseLockfile(lockfilePath string) string { +func detectBaseLockfile(ctx context.Context, lockfilePath string) string { if _, err := exec.LookPath("git"); err != nil { return "" } + // Bound every git subprocess so a wedged invocation cannot hang the check. + // Derived from the command context, so SIGINT still cancels earlier. + ctx, cancel := context.WithTimeout(ctx, baseDetectGitTimeout) + defer cancel() + // Anchor every git invocation to the directory that contains the lockfile, // not the process's cwd. Otherwise `armis-cli supply-chain check // /other/repo` (or a --lockfile outside cwd) resolves base detection @@ -197,13 +248,13 @@ func detectBaseLockfile(lockfilePath string) string { } gitWorkDir := filepath.Dir(absLockfile) - gitDir := exec.Command("git", "rev-parse", "--git-dir") //nolint:gosec // detecting git repo + gitDir := exec.CommandContext(ctx, "git", "rev-parse", "--git-dir") //nolint:gosec // detecting git repo gitDir.Dir = gitWorkDir if err := gitDir.Run(); err != nil { return "" } - showTopLevel := exec.Command("git", "rev-parse", "--show-toplevel") //nolint:gosec + showTopLevel := exec.CommandContext(ctx, "git", "rev-parse", "--show-toplevel") //nolint:gosec showTopLevel.Dir = gitWorkDir topLevel, err := showTopLevel.Output() if err != nil { @@ -233,7 +284,7 @@ func detectBaseLockfile(lockfilePath string) string { for _, base := range []string{"origin/main", "origin/master"} { // armis:ignore cwe:22 reason:relPath is confined to the repo tree by the traversal guard above and git resolves the pathspec within the repo; base is one of two hardcoded refs - showBase := exec.Command("git", "show", base+":"+relPath) //nolint:gosec // user's git repo + showBase := exec.CommandContext(ctx, "git", "show", base+":"+relPath) //nolint:gosec // user's git repo showBase.Dir = gitWorkDir content, err := showBase.Output() if err != nil { diff --git a/internal/cmd/supply_chain_check_test.go b/internal/cmd/supply_chain_check_test.go index 02fb5a14..0a0b2833 100644 --- a/internal/cmd/supply_chain_check_test.go +++ b/internal/cmd/supply_chain_check_test.go @@ -1,9 +1,12 @@ package cmd import ( + "context" "os" + "os/exec" "path/filepath" "testing" + "time" "github.com/ArmisSecurity/armis-cli/internal/supplychain" "github.com/spf13/cobra" @@ -83,3 +86,169 @@ func TestResolvePolicy_DefaultNoFailOpen(t *testing.T) { t.Error("policy.FailOpen should default to false with no config and no flag") } } + +// initGitRepoWithOriginMain creates a bare "origin" repo and a working clone in +// which lockfileName has been committed and pushed to origin/main. It returns +// the clone's working directory so a test can call detectBaseLockfile against a +// path inside it. The helper skips the test if git is unavailable. +func initGitRepoWithOriginMain(t *testing.T, lockfileName, content string) string { + t.Helper() + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not available on PATH") + } + + root := t.TempDir() + clone := filepath.Join(root, "repo") + + mustGit := func(dir string, args ...string) { + t.Helper() + cmd := exec.Command("git", args...) // #nosec G204 -- test helper, controlled args + cmd.Dir = dir + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v in %s: %v\n%s", args, dir, err, out) + } + } + + // Build a working repo, commit the lockfile on main, then point an "origin" + // remote at the repo itself and fetch so that the origin/main remote-tracking + // ref detectBaseLockfile reads resolves to the committed content. Using the + // repo as its own origin avoids a separate bare repo and a network push. + mustGit(root, "init", "-b", "main", clone) + mustGit(clone, "config", "user.email", "test@example.com") + mustGit(clone, "config", "user.name", "Test") + + if err := os.WriteFile(filepath.Join(clone, lockfileName), []byte(content), 0o600); err != nil { + t.Fatalf("write lockfile: %v", err) + } + mustGit(clone, "add", lockfileName) + mustGit(clone, "commit", "-m", "add lockfile") + mustGit(clone, "remote", "add", "origin", clone) + mustGit(clone, "fetch", "origin") + + // Resolve symlinks so the returned path matches what `git rev-parse + // --show-toplevel` reports. On macOS t.TempDir() lives under /var which is a + // symlink to /private/var; without this, detectBaseLockfile's filepath.Rel + // would yield a "../"-prefixed path and the traversal guard would reject it. + resolved, err := filepath.EvalSymlinks(clone) + if err != nil { + t.Fatalf("eval symlinks: %v", err) + } + return resolved +} + +func TestDetectBaseLockfile_NotAGitRepo(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not available on PATH") + } + dir := t.TempDir() + if got := detectBaseLockfile(context.Background(), filepath.Join(dir, "package-lock.json")); got != "" { + t.Errorf("detectBaseLockfile in non-repo = %q, want empty", got) + } +} + +func TestDetectBaseLockfile_NoOriginMain(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not available on PATH") + } + dir := t.TempDir() + if err := runTestGitCmd(dir, "init", "-b", "main"); err != nil { + t.Fatalf("git init: %v", err) + } + // A repo with no origin remote: neither origin/main nor origin/master + // resolves, so detection yields no base file. + if got := detectBaseLockfile(context.Background(), filepath.Join(dir, "package-lock.json")); got != "" { + t.Errorf("detectBaseLockfile with no origin = %q, want empty", got) + } +} + +func TestDetectBaseLockfile_FromOriginMain(t *testing.T) { + const content = `{"lockfileVersion":3,"packages":{}}` + clone := initGitRepoWithOriginMain(t, "package-lock.json", content) + + base := detectBaseLockfile(context.Background(), filepath.Join(clone, "package-lock.json")) + if base == "" { + t.Fatal("expected a base lockfile temp path, got empty") + } + t.Cleanup(func() { _ = os.Remove(base) }) + + got, err := os.ReadFile(base) //nolint:gosec // test-controlled temp path + if err != nil { + t.Fatalf("read base lockfile: %v", err) + } + if string(got) != content { + t.Errorf("base lockfile content = %q, want %q", got, content) + } + // The temp file should carry the lockfile's extension so downstream + // ecosystem detection (which is suffix-based) classifies it correctly. + if filepath.Ext(base) != ".json" { + t.Errorf("base temp file ext = %q, want .json", filepath.Ext(base)) + } +} + +func TestDetectBaseLockfile_LockfileNotInOriginMain(t *testing.T) { + // origin/main has a package-lock.json, but the caller asks about a different + // lockfile that was never committed. `git show origin/main:` fails for + // it, so both ref candidates fall through and detection returns empty rather + // than fabricating a base. + clone := initGitRepoWithOriginMain(t, "package-lock.json", `{"packages":{}}`) + + uncommitted := filepath.Join(clone, "pnpm-lock.yaml") + if err := os.WriteFile(uncommitted, []byte("lockfileVersion: '9.0'\n"), 0o600); err != nil { + t.Fatal(err) + } + if got := detectBaseLockfile(context.Background(), uncommitted); got != "" { + _ = os.Remove(got) + t.Errorf("detectBaseLockfile for lockfile absent from origin/main = %q, want empty", got) + } +} + +func TestDetectBaseLockfile_CanceledContext(t *testing.T) { + // An already-canceled context must short-circuit the git subprocesses (each + // runs under exec.CommandContext), so detection returns empty promptly rather + // than running git to completion. This guards the timeout wiring. + clone := initGitRepoWithOriginMain(t, "package-lock.json", `{"packages":{}}`) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + start := time.Now() + got := detectBaseLockfile(ctx, filepath.Join(clone, "package-lock.json")) + if got != "" { + _ = os.Remove(got) + t.Errorf("detectBaseLockfile with canceled context = %q, want empty", got) + } + if elapsed := time.Since(start); elapsed > baseDetectGitTimeout { + t.Errorf("detection took %v, expected to short-circuit well under the %v timeout", elapsed, baseDetectGitTimeout) + } +} + +func TestRunSupplyChainCheck_EcosystemScopeSkips(t *testing.T) { + // Config scopes enforcement to pip only; a check against a package-lock.json + // (npm) must skip the audit and return cleanly without querying any registry. + // If the gate did not fire, RunCheck would attempt npm registry lookups. + dir := chdirTemp(t) + writeConfig(t, dir, "version: 1\necosystems:\n - pip\n") + if err := os.WriteFile(filepath.Join(dir, "package-lock.json"), + []byte(`{"lockfileVersion":3,"packages":{"node_modules/x":{"version":"1.0.0","resolved":"https://registry.npmjs.org/x/-/x-1.0.0.tgz"}}}`), 0o600); err != nil { + t.Fatal(err) + } + + // Save/restore the package-level flag vars the check command reads. + origLockfile, origAll, origMinAge, origFormat := scLockfile, scAll, scMinAge, format + t.Cleanup(func() { + scLockfile, scAll, scMinAge, format = origLockfile, origAll, origMinAge, origFormat + }) + scLockfile = "package-lock.json" + scAll = true // skip base-lockfile git detection + scMinAge = "72h" + format = ohFormatJSON + + cmd := newWrapTestCmd() // a command with a live context + cmd.Flags().StringVar(&scMinAge, "min-age", "72h", "") + cmd.Flags().StringSliceVar(&scExclude, "exclude", nil, "") + cmd.Flags().BoolVar(&scFailOpen, "fail-open", false, "") + + if err := runSupplyChainCheck(cmd, []string{"."}); err != nil { + t.Fatalf("expected clean skip, got error: %v", err) + } +} diff --git a/internal/cmd/supply_chain_init.go b/internal/cmd/supply_chain_init.go index 6d5c2df1..a6eafcd3 100644 --- a/internal/cmd/supply_chain_init.go +++ b/internal/cmd/supply_chain_init.go @@ -94,6 +94,14 @@ func detectWrappablePMs() []string { return []string{pmNPM} } + // Honor the config's "ecosystems" scope so init only wraps the package + // managers the user opted to enforce. Loaded best-effort: a nil config (none + // found, or load error) enforces everything, matching the check/wrap gates. + var cfg *supplychain.Config + if dir := supplychain.FindConfigDir("."); dir != "" { + cfg, _ = supplychain.LoadConfig(dir) + } + seen := make(map[string]bool) var pms []string @@ -106,6 +114,10 @@ func detectWrappablePMs() []string { } for _, e := range ecosystems { + // armis:ignore cwe:476 reason:EnforcesEcosystem has an explicit nil-receiver guard (returns true when c==nil), so calling it on a nil cfg is safe by design + if !cfg.EnforcesEcosystem(e.Ecosystem) { + continue + } pm := ecosystemToPM(e.Ecosystem) // A bare requirements.txt is installed with pip, which can be present // under several names (pip, pip3, pip3.12). Wrap every variant on PATH @@ -120,6 +132,9 @@ func detectWrappablePMs() []string { } if len(pms) == 0 { + // Every detected ecosystem was excluded by config, or none mapped to a PM. + // Fall back to npm so the generated wrapper still protects the most common + // package manager rather than silently wrapping nothing. return []string{pmNPM} } return pms @@ -359,6 +374,12 @@ min-age: 72h # Packages matching these glob patterns bypass age checks %s +# Restrict enforcement to specific ecosystems (default: all detected). +# Supported: npm, pnpm, bun, yarn, pip, poetry, pipenv, pdm, uv, maven, gradle +# ecosystems: +# - npm +# - pip + # If true, allow installs when the registry is unreachable fail-open: false `, exclusionsBlock) diff --git a/internal/cmd/supply_chain_init_test.go b/internal/cmd/supply_chain_init_test.go index d36d0ad6..fe4bcec7 100644 --- a/internal/cmd/supply_chain_init_test.go +++ b/internal/cmd/supply_chain_init_test.go @@ -101,6 +101,26 @@ func TestDetectWrappablePMs_DefaultsToNpm(t *testing.T) { } } +func TestDetectWrappablePMs_HonorsEcosystemScope(t *testing.T) { + // Two lockfiles are present (npm + pnpm) but the config scopes enforcement to + // pnpm only, so init must wrap only pnpm. + dir := chdirTemp(t) + for _, f := range []string{"package-lock.json", "pnpm-lock.yaml"} { + if err := os.WriteFile(filepath.Join(dir, f), []byte("{}"), 0o600); err != nil { + t.Fatal(err) + } + } + if err := os.WriteFile(filepath.Join(dir, supplychain.ConfigFileName), + []byte("version: 1\necosystems:\n - pnpm\n"), 0o600); err != nil { + t.Fatal(err) + } + + pms := detectWrappablePMs() + if len(pms) != 1 || pms[0] != "pnpm" { + t.Errorf("detectWrappablePMs() = %v, want [pnpm] (npm excluded by config scope)", pms) + } +} + func TestExtractScope(t *testing.T) { tests := []struct { name string diff --git a/internal/cmd/supply_chain_test.go b/internal/cmd/supply_chain_test.go new file mode 100644 index 00000000..4637ea00 --- /dev/null +++ b/internal/cmd/supply_chain_test.go @@ -0,0 +1,72 @@ +package cmd + +import ( + "strings" + "testing" + + "github.com/ArmisSecurity/armis-cli/internal/model" + "github.com/ArmisSecurity/armis-cli/internal/output" +) + +// TestSupplyChainUnknownSubcommand guards the parent command's RunE: an unknown +// subcommand must error (non-zero exit) instead of silently printing help and +// exiting 0, which would let a typo like `supply-chain chekc` "pass" in CI. +func TestSupplyChainUnknownSubcommand(t *testing.T) { + t.Run("unknown subcommand returns an error", func(t *testing.T) { + err := supplyChainCmd.RunE(supplyChainCmd, []string{"chekc"}) + if err == nil { + t.Fatal("expected an error for an unknown subcommand, got nil") + } + if !strings.Contains(err.Error(), "unknown subcommand") { + t.Errorf("error should mention the unknown subcommand, got: %v", err) + } + }) + + t.Run("close typo offers a suggestion", func(t *testing.T) { + err := supplyChainCmd.RunE(supplyChainCmd, []string{"chekc"}) + if err == nil { + t.Fatal("expected an error, got nil") + } + if !strings.Contains(err.Error(), "Did you mean") || !strings.Contains(err.Error(), "check") { + t.Errorf("expected a 'Did you mean ... check' suggestion, got: %v", err) + } + }) + + t.Run("no args falls back to help without error", func(t *testing.T) { + if err := supplyChainCmd.RunE(supplyChainCmd, []string{}); err != nil { + t.Errorf("no-arg invocation should print help and return nil, got: %v", err) + } + }) +} + +// TestSupplyChainCheckFailOnCaseInsensitive is the regression test for the +// CI-gate bypass: `supply-chain check` routes --fail-on through getFailOn(), +// which uppercases and validates it. A lowercase "medium" must therefore trip +// the gate on a MEDIUM finding (ShouldFail matches severities exactly), and an +// invalid value must be rejected rather than silently ignored. +func TestSupplyChainCheckFailOnCaseInsensitive(t *testing.T) { + medium := &model.ScanResult{ + Findings: []model.Finding{{Severity: model.SeverityMedium}}, + } + + t.Run("lowercase fail-on still fails the gate", func(t *testing.T) { + failOn = []string{"medium"} + normalized, err := getFailOn() + if err != nil { + t.Fatalf("getFailOn rejected a valid lowercase severity: %v", err) + } + if !output.ShouldFail(medium, normalized) { + t.Error("lowercase --fail-on medium should fail on a MEDIUM finding after normalization") + } + }) + + t.Run("invalid fail-on is rejected", func(t *testing.T) { + failOn = []string{"banana"} + if _, err := getFailOn(); err == nil { + t.Error("getFailOn should reject an invalid severity, not silently ignore it") + } + }) + + // Reset the shared package global so this test can't leak state into others. + failOn = []string{"CRITICAL"} +} diff --git a/internal/cmd/supply_chain_wrap.go b/internal/cmd/supply_chain_wrap.go index eb35e4ba..fe335f8a 100644 --- a/internal/cmd/supply_chain_wrap.go +++ b/internal/cmd/supply_chain_wrap.go @@ -9,10 +9,12 @@ import ( "strings" "time" + "github.com/ArmisSecurity/armis-cli/internal/cli" "github.com/ArmisSecurity/armis-cli/internal/model" "github.com/ArmisSecurity/armis-cli/internal/output" "github.com/ArmisSecurity/armis-cli/internal/supplychain" "github.com/ArmisSecurity/armis-cli/internal/supplychain/check" + "github.com/ArmisSecurity/armis-cli/internal/util" "github.com/spf13/cobra" ) @@ -62,6 +64,13 @@ var allowedPMs = map[string]bool{ pmMaven: true, pmGradle: true, } +// execPMFunc is the indirection used to run a package manager. Production code +// leaves it pointing at execPM; tests replace it (restoring via t.Cleanup) to +// capture the resolved name/args/env and exit code without spawning a real +// process. Routing every call site through this var is the only seam that makes +// runProxyWrap/runPreInstallBlock unit-testable. +var execPMFunc = execPM + func runSupplyChainWrap(cmd *cobra.Command, args []string) error { // pmName is the exact command the user invoked (e.g. "pip3.12"); execName is // the binary actually run. canonicalPM collapses pip variants (pip3, pip3.12) @@ -77,12 +86,19 @@ func runSupplyChainWrap(cmd *cobra.Command, args []string) error { } if os.Getenv(envSCActive) == "1" { - return exitWithCode(execPM(pmName, pmArgs, nil)) + return exitWithCode(execPMFunc(pmName, pmArgs, nil)) } if strings.EqualFold(os.Getenv(envSCOff), "off") { fmt.Fprintf(os.Stderr, "[armis] supply-chain disabled via %s=off\n", envSCOff) - return exitWithCode(execPM(pmName, pmArgs, nil)) + return exitWithCode(execPMFunc(pmName, pmArgs, nil)) + } + + // Respect the config's "ecosystems" scope: when it lists ecosystems and this + // PM's ecosystem is not among them, pass straight through to the real PM + // without enforcement. The gate fails safe (enforces) on any config error. + if !wrapEcosystemEnforced(canonical) { + return exitWithCode(execPMFunc(pmName, pmArgs, nil)) } // poetry, pipenv, and pdm resolve dependencies through their own lockfiles @@ -110,15 +126,25 @@ func canonicalPM(pm string) string { func runProxyWrap(cmd *cobra.Command, pmName string, pmArgs []string) error { policy := resolveWrapPolicy() + // pip and uv resolve from the PyPI Simple API, a different protocol from the + // npm registry, so the proxy must run in PyPI mode (PEP 691/700 JSON file + // filtering). All other proxied PMs (npm/pnpm/bun/yarn) speak the npm registry. + mode := supplychain.ModeNPM + switch canonicalPM(pmName) { + case pmPip, pmUV: + mode = supplychain.ModePyPI + } + cfg := supplychain.ProxyConfig{ Policy: policy, + Mode: mode, SkipPackages: parseSkipPackages(os.Getenv(envSCSkip)), } proxy, err := supplychain.NewProxy(cfg) if err != nil { fmt.Fprintf(os.Stderr, "[armis] supply-chain: proxy setup failed, falling through: %v\n", err) - return exitWithCode(execPM(pmName, pmArgs, nil)) + return exitWithCode(execPMFunc(pmName, pmArgs, nil)) } ctx, cancel := context.WithTimeout(cmd.Context(), 30*time.Minute) @@ -127,7 +153,7 @@ func runProxyWrap(cmd *cobra.Command, pmName string, pmArgs []string) error { addr, err := proxy.Start(ctx) if err != nil { fmt.Fprintf(os.Stderr, "[armis] supply-chain: proxy start failed, falling through: %v\n", err) - return exitWithCode(execPM(pmName, pmArgs, nil)) + return exitWithCode(execPMFunc(pmName, pmArgs, nil)) } defer proxy.Close() //nolint:errcheck @@ -137,9 +163,15 @@ func runProxyWrap(cmd *cobra.Command, pmName string, pmArgs []string) error { extraEnv := registryEnvForPM(canonicalPM(pmName), registryURL) extraEnv = append(extraEnv, fmt.Sprintf("%s=1", envSCActive)) - exitCode, err := execPM(pmName, pmArgs, extraEnv) + exitCode, err := execPMFunc(pmName, pmArgs, extraEnv) - printBlockSummary(proxy.Blocked(), proxy.Allowed(), proxy.Checked(), policy, pmName) + // installOK reflects what the package manager actually did, not what the proxy + // offered: proxy.Allowed() only records the version "latest" was repointed to, + // so without the exit code the summary would claim a package was "installed" + // even when the PM rejected it (e.g. a pin like ^1.17.0 that only the filtered + // version satisfies). Report the observed outcome, not the proxy's intent. + installOK := err == nil && exitCode == 0 + printBlockSummary(proxy.Blocked(), proxy.Allowed(), proxy.Checked(), policy, pmName, installOK) if err != nil { return err @@ -231,7 +263,20 @@ func exitWithCode(code int, err error) error { const maxBlockedDisplay = 5 -func printBlockSummary(blocked []supplychain.BlockedPackage, allowed []supplychain.InstalledPackage, checked int, policy supplychain.Policy, pmName string) { +// pkgFilterResult is the per-package view of the proxy's filtering decision. It +// collapses the possibly-several blocked versions of one package into a single +// line: the youngest blocked version (the one the PM would have installed as +// "latest") paired with the older version the proxy resolved to instead — or an +// empty NewVersion when no safe fallback existed. +type pkgFilterResult struct { + Name string + OldVersion string // youngest blocked version (what the PM wanted) + OldAge time.Duration // age of that version at install time + Severity model.Severity // severity classification of that age + NewVersion string // resolved fallback version, "" if none was available +} + +func printBlockSummary(blocked []supplychain.BlockedPackage, allowed []supplychain.InstalledPackage, checked int, policy supplychain.Policy, pmName string, installOK bool) { s := output.GetStyles() if len(blocked) == 0 { @@ -239,7 +284,7 @@ func printBlockSummary(blocked []supplychain.BlockedPackage, allowed []supplycha fmt.Fprintf(os.Stderr, "%s %s %s %s\n", s.MutedText.Render(scPrefix), s.SuccessText.Render(output.IconSuccess), - s.SuccessText.Render(fmt.Sprintf("supply-chain: %d packages checked, all pass", checked)), + s.SuccessText.Render(fmt.Sprintf("supply-chain: %s checked, all pass", countNoun(checked, "package"))), s.MutedText.Render(fmt.Sprintf("(%s minimum age)", formatDurationShort(policy.MinReleaseAge)))) } return @@ -250,53 +295,268 @@ func printBlockSummary(blocked []supplychain.BlockedPackage, allowed []supplycha allowedVersions[pkg.Name] = pkg.Version } - relevant := filterRelevantBlocked(blocked) - - sort.Slice(relevant, func(i, j int) bool { - return relevant[i].Age < relevant[j].Age - }) - - fmt.Fprintf(os.Stderr, "\n%s %s\n", - s.MutedText.Render(scPrefix), - s.WarningText.Render(fmt.Sprintf("supply-chain: filtered %d version(s) younger than %s", len(relevant), formatDurationShort(policy.MinReleaseAge)))) + results := groupBlockedByPackage(filterRelevantBlocked(blocked), allowedVersions, policy.MinReleaseAge) + if len(results) == 0 { + return + } - blockedPkgNames := blockedNamesUnique(relevant) - resolvedCount := 0 - for _, name := range blockedPkgNames { - if _, ok := allowedVersions[name]; ok { - resolvedCount++ + allResolved := true + for _, r := range results { + if r.NewVersion == "" { + allResolved = false + break } } - if resolvedCount > 0 { - fmt.Fprintf(os.Stderr, " %s %s\n", + + policyShort := formatPolicyShort(policy.MinReleaseAge) + verbose := len(results) > maxBlockedDisplay + + // "Installed" wording is only truthful when the package manager actually + // completed. A repointed "latest" is what the proxy offered, not proof of an + // install — a pin that only the filtered version satisfies still fails the PM. + // success drives the green header and the per-line "installed" wording; when + // the PM exited non-zero we report the filter as a fact and stay neutral. + success := allResolved && installOK + + // Header. When every filtered package resolved to a safe older version AND the + // install completed, the user was both protected and unblocked — frame it as + // success (green). Otherwise stay neutral (muted): either a package had no safe + // fallback, or the PM did not complete; the per-line glyph and the footnote + // carry the detail. + switch { + case success: + versionWord := "version" + if len(results) > 1 { + versionWord = "versions" + } + fmt.Fprintf(os.Stderr, "\n%s %s %s\n", + s.MutedText.Render(scPrefix), s.SuccessText.Render(output.IconSuccess), - s.SuccessText.Render(fmt.Sprintf("resolved %d package(s) to older safe versions", resolvedCount))) + s.SuccessText.Render(fmt.Sprintf("supply-chain: filtered %s → installed safe %s (%s policy)", + countNoun(len(results), "too-new release"), versionWord, policyShort))) + case !installOK: + fmt.Fprintf(os.Stderr, "\n%s %s\n", + s.MutedText.Render(scPrefix), + s.WarningText.Render(fmt.Sprintf("supply-chain: filtered %s; install did not complete (%s policy)", + countNoun(len(results), "too-new release"), policyShort))) + default: + fmt.Fprintf(os.Stderr, "\n%s %s\n", + s.MutedText.Render(scPrefix), + s.MutedText.Render(fmt.Sprintf("supply-chain: filtered %s (%s policy)", + countNoun(len(results), "too-new release"), policyShort))) } - displayCount := len(relevant) + displayCount := len(results) if displayCount > maxBlockedDisplay { displayCount = maxBlockedDisplay } - fmt.Fprintf(os.Stderr, " %s\n", s.MutedText.Render("Filtered out:")) - for _, b := range relevant[:displayCount] { - age := formatDurationShort(b.Age) - sev := supplychain.ClassifySeverity(b.Age, policy.MinReleaseAge) - dot := severityDot(s, sev) - fmt.Fprintf(os.Stderr, " %s %s %s\n", - dot, - s.Bold.Render(fmt.Sprintf("%s@%s", b.Name, b.Version)), - s.MutedText.Render(fmt.Sprintf("(published %s ago)", age))) + // Pad the name, version, and age columns to a common width so the "→ installed" + // outcomes line up vertically across rows. Widths are computed over the rows + // actually shown (not the full result set) so a truncated list still aligns. + var cols colWidths + for _, r := range results[:displayCount] { + if n := len(r.Name); n > cols.name { + cols.name = n + } + if n := len(r.OldVersion); n > cols.version { + cols.version = n + } + if n := len(ageToken(r.OldAge)); n > cols.age { + cols.age = n + } } - if remaining := len(relevant) - displayCount; remaining > 0 { + for _, r := range results[:displayCount] { + printPkgFilterLine(s, r, !allResolved, installOK, cols) + } + if remaining := len(results) - displayCount; remaining > 0 { fmt.Fprintf(os.Stderr, " %s\n", s.MutedText.Render(fmt.Sprintf("… and %d more", remaining))) } - fmt.Fprintf(os.Stderr, "\n %s\n", s.MutedText.Render(strings.Repeat("─", scSepLen))) - fmt.Fprintf(os.Stderr, " %s %s\n\n", - s.MutedText.Render("Disable:"), - s.Bold.Render(fmt.Sprintf("%s=off %s install", envSCOff, pmName))) + // When the package manager did not complete, explain the likely link to the + // filter without asserting it: the failure could be unrelated (a typo, a + // network error), but a pin that only the filtered version satisfies is the + // common cause, so point at the actionable fix. + if !installOK { + fmt.Fprintf(os.Stderr, "\n %s %s\n", + s.MutedText.Render("Note:"), + s.MutedText.Render("the install did not complete. If a dependency pins a version newer than the policy window, relax the constraint or exclude the package.")) + } + + // One-time rationale: the first time a user sees a filter on an interactive + // terminal, explain why brand-new releases are withheld. Suppressed on every + // subsequent install and in CI/piped output so it never becomes noise. + if shouldShowRationale() { + fmt.Fprintf(os.Stderr, "\n %s %s\n", + s.MutedText.Render("Why:"), + s.MutedText.Render("brand-new releases are a common supply-chain attack vector; Armis installs the newest version older than the policy window.")) + markRationaleShown() + } + + // Footer. A long list earns the full divider and copy-paste disable command; + // the common short case gets a single terse hint instead of heavy chrome. + if verbose { + fmt.Fprintf(os.Stderr, "\n %s\n", s.MutedText.Render(strings.Repeat("─", scSepLen))) + fmt.Fprintf(os.Stderr, " %s %s\n\n", + s.MutedText.Render("Disable:"), + s.Bold.Render(fmt.Sprintf("%s=off %s install", envSCOff, pmName))) + } else { + fmt.Fprintf(os.Stderr, " %s %s\n", + s.MutedText.Render("Disable:"), + s.Bold.Render(fmt.Sprintf("%s=off", envSCOff))) + } +} + +// colWidths holds the maximum plain-text width of each aligned column. Padding +// is computed on the unstyled strings: len() on a lipgloss-rendered value counts +// invisible ANSI escape bytes, so columns must be padded before .Render(). +type colWidths struct { + name int + version int + age int +} + +// ageToken formats a blocked version's age as it appears on the line, e.g. +// "(1 day old)". Centralized so the column-width measurement and the rendered +// output cannot drift apart. +func ageToken(age time.Duration) string { + return fmt.Sprintf("(%s old)", formatDurationShort(age)) +} + +// rightPad appends spaces so s occupies at least width columns. It pads the +// plain string before styling, keeping alignment correct with colors on or off. +func rightPad(s string, width int) string { + if pad := width - len(s); pad > 0 { + return s + strings.Repeat(" ", pad) + } + return s +} + +// printPkgFilterLine renders one package's filter outcome on a single line: +// glyph, name, the blocked version and its age, an arrow, and the resolved +// version (or an inline warning when none existed). The leading glyph depends on +// context: when every package resolved (mixed == false) the header already +// signals success, so the line shows the severity dot to convey how fresh the +// blocked version was; in the mixed case the header is neutral, so a per-line +// ✓/⚠ carries the resolved-vs-unresolved tone. The resolved-version wording is +// "installed" only when the PM completed (installOK); otherwise it reads +// "available" — the safe version exists, but we cannot claim it was installed. +// cols pads each column so the arrows and outcomes line up across rows. +func printPkgFilterLine(s *output.Styles, r pkgFilterResult, mixed, installOK bool, cols colWidths) { + resolvedWord := "installed" + if !installOK { + resolvedWord = "available" + } + + var glyph, outcome string + switch { + case r.NewVersion == "": + glyph = s.WarningText.Render("⚠") + outcome = s.WarningText.Render("no older safe version (install may fail)") + case mixed: + glyph = s.SuccessText.Render(output.IconSuccess) + outcome = fmt.Sprintf("%s %s", s.SuccessText.Render(r.NewVersion), s.MutedText.Render(resolvedWord)) + default: + glyph = severityDot(s, r.Severity) + outcome = fmt.Sprintf("%s %s", s.SuccessText.Render(r.NewVersion), s.MutedText.Render(resolvedWord)) + } + + fmt.Fprintf(os.Stderr, " %s %s %s %s %s %s\n", + glyph, + s.Bold.Render(rightPad(r.Name, cols.name)), + s.MutedText.Render(rightPad(r.OldVersion, cols.version)), + s.MutedText.Render(rightPad(ageToken(r.OldAge), cols.age)), + s.MutedText.Render("→"), + outcome) +} + +// groupBlockedByPackage collapses blocked versions into one result per package. +// For each package it keeps the youngest blocked version — the one the PM would +// have installed as "latest" — and pairs it with the resolved fallback from +// allowedVersions. Results are sorted youngest-first (ties broken by name) so +// the freshest, riskiest package leads the list and output is deterministic. +func groupBlockedByPackage(blocked []supplychain.BlockedPackage, allowedVersions map[string]string, threshold time.Duration) []pkgFilterResult { + byName := make(map[string]pkgFilterResult, len(blocked)) + for _, b := range blocked { + if existing, ok := byName[b.Name]; ok && existing.OldAge <= b.Age { + continue // already holding a younger (or equally young) version + } + byName[b.Name] = pkgFilterResult{ + Name: b.Name, + OldVersion: b.Version, + OldAge: b.Age, + Severity: supplychain.ClassifySeverity(b.Age, threshold), + NewVersion: allowedVersions[b.Name], + } + } + + results := make([]pkgFilterResult, 0, len(byName)) + for _, r := range byName { + results = append(results, r) + } + sort.Slice(results, func(i, j int) bool { + if results[i].OldAge != results[j].OldAge { + return results[i].OldAge < results[j].OldAge + } + return results[i].Name < results[j].Name + }) + return results +} + +// formatPolicyShort renders the policy window as a hyphenated adjective for use +// directly before a noun, e.g. "3-day" in "(3-day policy)". It mirrors the unit +// boundaries of formatDurationShort but always uses the singular unit form. +func formatPolicyShort(d time.Duration) string { + if d < time.Hour { + return fmt.Sprintf("%d-minute", int(d.Minutes())) + } + hours := int(d.Hours()) + if hours < 24 { + return fmt.Sprintf("%d-hour", hours) + } + return fmt.Sprintf("%d-day", hours/24) +} + +// rationaleMarkerFile is the cache-dir filename whose presence records that the +// one-time supply-chain "why" explainer has already been shown to this user. +const rationaleMarkerFile = "supply-chain-onboarded" + +// shouldShowRationale reports whether to print the one-time "why" explainer. It +// is gated on an interactive terminal (so CI logs and piped output stay terse) +// and on the absence of the marker file (so it appears only once). +func shouldShowRationale() bool { + return cli.IsInteractive() && !rationaleAlreadyShown() +} + +// rationaleAlreadyShown reports whether the onboarding marker file exists. A +// path that cannot be resolved is treated as "already shown" so a broken cache +// directory yields terse output rather than the explainer on every install. +func rationaleAlreadyShown() bool { + path := util.GetCacheFilePath(rationaleMarkerFile) + if path == "" { + return true + } + _, err := os.Stat(path) + return err == nil +} + +// markRationaleShown records that the explainer has been shown by creating the +// marker file. It is best-effort: any failure (e.g. an unwritable cache dir) is +// ignored so a marker-write error never blocks an install or surfaces an error. +func markRationaleShown() { + dir := util.GetCacheDir() + if dir == "" { + return + } + if err := os.MkdirAll(dir, 0o700); err != nil { + return + } + path := util.GetCacheFilePath(rationaleMarkerFile) + if path == "" { + return + } + _ = os.WriteFile(path, []byte("1"), 0o600) } func filterRelevantBlocked(blocked []supplychain.BlockedPackage) []supplychain.BlockedPackage { @@ -337,18 +597,6 @@ func formatDurationShort(d time.Duration) string { return fmt.Sprintf("%d days", days) } -func blockedNamesUnique(blocked []supplychain.BlockedPackage) []string { - seen := make(map[string]bool) - var names []string - for _, b := range blocked { - if !seen[b.Name] { - seen[b.Name] = true - names = append(names, b.Name) - } - } - return names -} - func registryEnvForPM(pm, registryURL string) []string { switch pm { case pmBun: @@ -420,6 +668,27 @@ func resolveWrapPolicy() supplychain.Policy { return supplychain.DefaultPolicy() } +// wrapEcosystemEnforced reports whether the config (searched upward from the +// current directory) scopes enforcement to exclude this package manager's +// ecosystem. It loads the config best-effort: any load error or absent config +// means "enforce" (fail safe), matching resolveWrapPolicy's posture. Pass the +// canonical PM name so versioned pip variants resolve correctly. +func wrapEcosystemEnforced(canonicalPMName string) bool { + eco := pmToEcosystem(canonicalPMName) + if eco == "" { + return true + } + dir := supplychain.FindConfigDir(".") + if dir == "" { + return true + } + cfg, err := supplychain.LoadConfig(dir) + if err != nil || cfg == nil { + return true + } + return cfg.EnforcesEcosystem(eco) +} + // requiresPreInstallBlock reports whether a package manager must be enforced via // a pre-install lockfile audit rather than the transparent registry proxy. // poetry, pipenv, and pdm resolve from their own lockfiles and do not honor an @@ -446,7 +715,22 @@ func runPreInstallBlock(cmd *cobra.Command, pmName string, pmArgs []string) erro if lockfilePath == "" { fmt.Fprintf(os.Stderr, "%s no lockfile found for %s, running without enforcement\n", scPrefix, pmName) - return exitWithCode(execPM(pmName, pmArgs, nil)) + return exitWithCode(execPMFunc(pmName, pmArgs, nil)) + } + + // Gradle records resolved versions in gradle.lockfile but does not regenerate + // it automatically: if build.gradle changed since the lock was written, the + // audit reflects stale versions. Warn so the user knows to re-lock. + if pmName == pmGradle { + checkGradleStaleness(lockfilePath) + } + + // A pom.xml lists only direct dependencies — Maven resolves transitives at + // build time, so they are not audited here. Flag the gap so the result is not + // mistaken for full coverage. + if pmName == pmMaven && strings.HasSuffix(lockfilePath, "pom.xml") { + fmt.Fprintf(os.Stderr, "%s note: pom.xml only covers direct dependencies. For full transitive\n", scPrefix) + fmt.Fprintf(os.Stderr, " coverage, consider a lockfile plugin (e.g., io.github.chains-project:maven-lockfile)\n") } ctx, cancel := context.WithTimeout(cmd.Context(), 5*time.Minute) @@ -461,7 +745,7 @@ func runPreInstallBlock(cmd *cobra.Command, pmName string, pmArgs []string) erro // would let a transient failure bypass the control entirely. if policy.FailOpen { fmt.Fprintf(os.Stderr, "%s supply-chain: check failed, allowing (fail-open): %v\n", scPrefix, err) - return exitWithCode(execPM(pmName, pmArgs, nil)) + return exitWithCode(execPMFunc(pmName, pmArgs, nil)) } fmt.Fprintf(os.Stderr, "%s supply-chain: check failed, blocking (fail-closed): %v\n", scPrefix, err) fmt.Fprintf(os.Stderr, " %s\n", "Set fail-open: true in .armis-supply-chain.yaml to allow installs when the check cannot run.") @@ -492,7 +776,7 @@ func runPreInstallBlock(cmd *cobra.Command, pmName string, pmArgs []string) erro s.SuccessText.Render(fmt.Sprintf("supply-chain: %d packages checked, all pass", result.Checked)), s.MutedText.Render(fmt.Sprintf("(%s minimum age)", formatDurationShort(policy.MinReleaseAge)))) } - return exitWithCode(execPM(pmName, pmArgs, nil)) + return exitWithCode(execPMFunc(pmName, pmArgs, nil)) } printPreInstallBlockSummary(violations, policy, pmName) @@ -509,7 +793,7 @@ func printPreInstallBlockSummary(violations []supplychain.Violation, policy supp fmt.Fprintf(os.Stderr, "\n%s %s\n", s.MutedText.Render(scPrefix), - s.WarningText.Render(fmt.Sprintf("supply-chain: BLOCKED — %d package(s) younger than %s", len(violations), formatDurationShort(policy.MinReleaseAge)))) + s.WarningText.Render(fmt.Sprintf("supply-chain: BLOCKED — %s younger than %s", countNoun(len(violations), "package"), formatDurationShort(policy.MinReleaseAge)))) fmt.Fprintf(os.Stderr, " %s\n", s.MutedText.Render("Build was stopped BEFORE execution to prevent supply chain attacks.")) @@ -525,7 +809,7 @@ func printPreInstallBlockSummary(violations []supplychain.Violation, policy supp fmt.Fprintf(os.Stderr, " %s %s %s\n", dot, s.Bold.Render(fmt.Sprintf("%s@%s", v.Name, v.Version)), - s.MutedText.Render(fmt.Sprintf("(published %s ago)", age))) + s.MutedText.Render(fmt.Sprintf("(%s old)", age))) } if remaining := len(violations) - displayCount; remaining > 0 { fmt.Fprintf(os.Stderr, " %s\n", @@ -560,8 +844,25 @@ func blockedViolationNames(violations []supplychain.Violation) []string { return names } +// pmToEcosystem maps a (canonical) package-manager name to its ecosystem. It +// covers every supported PM — both the pre-install ones (used by +// runPreInstallBlock to locate the lockfile) and the proxied ones (used by the +// ecosystems-config scoping gate). Pass the canonical name (canonicalPM) so a +// versioned pip variant resolves to EcosystemPip. func pmToEcosystem(pm string) supplychain.Ecosystem { switch pm { + case pmNPM: + return supplychain.EcosystemNPM + case pmPNPM: + return supplychain.EcosystemPNPM + case pmBun: + return supplychain.EcosystemBun + case pmYarn: + return supplychain.EcosystemYarn + case pmPip: + return supplychain.EcosystemPip + case pmUV: + return supplychain.EcosystemUV case pmPoetry: return supplychain.EcosystemPoetry case pmPipenv: @@ -576,3 +877,33 @@ func pmToEcosystem(pm string) supplychain.Ecosystem { return "" } } + +// checkGradleStaleness warns when build.gradle (or build.gradle.kts) has been +// modified more recently than gradle.lockfile, which means the lock — and the +// audit derived from it — may not reflect the current dependency declarations. +// It is advisory only: a stale lock is a correctness gap the user should resolve +// with "gradle dependencies --write-locks", not a reason to block the build. +func checkGradleStaleness(lockfilePath string) { + lockInfo, err := os.Stat(lockfilePath) + if err != nil { + return + } + + // gradle.lockfile sits beside build.gradle (or the Kotlin DSL build.gradle.kts); + // trimming the lockfile leaf yields the directory prefix to probe. + prefix := strings.TrimSuffix(lockfilePath, "gradle.lockfile") + buildInfo, err := os.Stat(prefix + "build.gradle") + if err != nil { + buildInfo, err = os.Stat(prefix + "build.gradle.kts") + if err != nil { + return + } + } + + if buildInfo.ModTime().After(lockInfo.ModTime()) { + s := output.GetStyles() + fmt.Fprintf(os.Stderr, "%s %s lockfile may be stale (build.gradle is newer). Run:\n", + s.MutedText.Render(scPrefix), s.WarningText.Render("⚠")) + fmt.Fprintf(os.Stderr, " %s\n\n", s.Bold.Render("gradle dependencies --write-locks")) + } +} diff --git a/internal/cmd/supply_chain_wrap_pm_test.go b/internal/cmd/supply_chain_wrap_pm_test.go index e0923ed1..f25370d4 100644 --- a/internal/cmd/supply_chain_wrap_pm_test.go +++ b/internal/cmd/supply_chain_wrap_pm_test.go @@ -51,6 +51,8 @@ func TestRequiresPreInstallBlock(t *testing.T) { {pmPoetry, true}, {pmPipenv, true}, {pmPDM, true}, + {pmMaven, true}, + {pmGradle, true}, {pmPip, false}, {pmUV, false}, {pmNPM, false}, @@ -76,11 +78,19 @@ func TestPmToEcosystem(t *testing.T) { {pmPoetry, supplychain.EcosystemPoetry}, {pmPipenv, supplychain.EcosystemPipfile}, {pmPDM, supplychain.EcosystemPDM}, - // pip/uv use the proxy path, not the pre-install block, so they have no - // pre-install ecosystem mapping. - {pmPip, ""}, - {pmUV, ""}, - {pmNPM, ""}, + {pmMaven, supplychain.EcosystemMaven}, + {pmGradle, supplychain.EcosystemGradle}, + // pmToEcosystem maps every supported PM to its ecosystem — the proxied + // ones (npm/pnpm/bun/yarn/pip/uv) as well as the pre-install ones — so the + // config "ecosystems" scoping gate can classify any wrapped PM. Pass the + // canonical name; a versioned pip variant resolves to pip via canonicalPM. + {pmNPM, supplychain.EcosystemNPM}, + {pmPNPM, supplychain.EcosystemPNPM}, + {pmBun, supplychain.EcosystemBun}, + {pmYarn, supplychain.EcosystemYarn}, + {pmPip, supplychain.EcosystemPip}, + {pmUV, supplychain.EcosystemUV}, + {"unknown-pm", ""}, } for _, tt := range tests { diff --git a/internal/cmd/supply_chain_wrap_summary_test.go b/internal/cmd/supply_chain_wrap_summary_test.go new file mode 100644 index 00000000..8c714350 --- /dev/null +++ b/internal/cmd/supply_chain_wrap_summary_test.go @@ -0,0 +1,301 @@ +package cmd + +import ( + "bytes" + "io" + "os" + "strings" + "testing" + "time" + + "github.com/ArmisSecurity/armis-cli/internal/cli" + "github.com/ArmisSecurity/armis-cli/internal/output" + "github.com/ArmisSecurity/armis-cli/internal/supplychain" +) + +// captureStderr swaps os.Stderr for a pipe, runs fn, and returns everything fn +// wrote to stderr. A goroutine drains the pipe so output larger than the pipe +// buffer cannot deadlock. The original stderr is always restored. +func captureStderr(t *testing.T, fn func()) string { + t.Helper() + orig := os.Stderr + r, w, err := os.Pipe() + if err != nil { + t.Fatalf("os.Pipe: %v", err) + } + os.Stderr = w + + done := make(chan string, 1) + go func() { + var buf bytes.Buffer + _, _ = io.Copy(&buf, r) + done <- buf.String() + }() + + fn() + _ = w.Close() + os.Stderr = orig + return <-done +} + +// forceNoColor pins styles to the plain (no-ANSI) set so output assertions can +// match literal substrings without escape codes. It restores nothing because +// every test that renders output calls it first. +func forceNoColor(t *testing.T) { + t.Helper() + cli.InitColors(cli.ColorModeNever) + output.SyncStylesWithColorMode() +} + +// testPolicy returns a policy with the default 3-day window for output tests. +func testPolicy() supplychain.Policy { + return supplychain.Policy{MinReleaseAge: 72 * time.Hour} +} + +func TestPrintBlockSummary_AllPass(t *testing.T) { + // Tier A: nothing filtered but packages were checked → single green line that + // uses countNoun (no "(s)"). + forceNoColor(t) + out := captureStderr(t, func() { + printBlockSummary(nil, nil, 12, testPolicy(), pmNPM, true) + }) + if !strings.Contains(out, "12 packages checked, all pass") { + t.Errorf("missing all-pass line; got:\n%s", out) + } + if strings.Contains(out, "package(s)") { + t.Errorf("output still uses package(s); got:\n%s", out) + } +} + +func TestPrintBlockSummary_AllPassZeroChecked(t *testing.T) { + // Nothing checked → nothing printed (e.g. a fully cached install). + forceNoColor(t) + out := captureStderr(t, func() { + printBlockSummary(nil, nil, 0, testPolicy(), pmNPM, true) + }) + if out != "" { + t.Errorf("expected no output when nothing was checked; got:\n%s", out) + } +} + +func TestPrintBlockSummary_SingleResolved(t *testing.T) { + // Tier B: one package filtered and resolved → success header, old→new line, + // terse Disable hint, and NO divider/heavy chrome. + forceNoColor(t) + blocked := []supplychain.BlockedPackage{{Name: "axios", Version: "1.17.0", Age: 24 * time.Hour}} + allowed := []supplychain.InstalledPackage{{Name: "axios", Version: "1.16.1"}} + + out := captureStderr(t, func() { + printBlockSummary(blocked, allowed, 5, testPolicy(), pmNPM, true) + }) + + wantSubstrings := []string{ + "filtered 1 too-new release → installed safe version (3-day policy)", + "axios", + "1.17.0", + "(1 day old)", + "→", + "1.16.1 installed", + "Disable: ARMIS_SUPPLY_CHAIN=off", + } + for _, want := range wantSubstrings { + if !strings.Contains(out, want) { + t.Errorf("missing %q; got:\n%s", want, out) + } + } + // Short list must not draw the divider or the full copy-paste incantation. + if strings.Contains(out, strings.Repeat("─", scSepLen)) { + t.Errorf("short list should not draw a divider; got:\n%s", out) + } + if strings.Contains(out, "ARMIS_SUPPLY_CHAIN=off npm install") { + t.Errorf("short list should use the terse disable hint; got:\n%s", out) + } +} + +func TestPrintBlockSummary_MixedUnresolved(t *testing.T) { + // Tier C: one package resolved, one with no safe fallback → neutral header + // (no "installed safe"), per-line warning for the unresolved package. + forceNoColor(t) + blocked := []supplychain.BlockedPackage{ + {Name: "axios", Version: "1.17.0", Age: 24 * time.Hour}, + {Name: "leftpad", Version: "2.0.0", Age: 2 * time.Hour}, + } + allowed := []supplychain.InstalledPackage{{Name: "axios", Version: "1.16.1"}} + + out := captureStderr(t, func() { + printBlockSummary(blocked, allowed, 7, testPolicy(), pmNPM, true) + }) + + if !strings.Contains(out, "filtered 2 too-new releases (3-day policy)") { + t.Errorf("missing neutral mixed header; got:\n%s", out) + } + if strings.Contains(out, "installed safe") { + t.Errorf("mixed header must not claim success; got:\n%s", out) + } + if !strings.Contains(out, "no older safe version (install may fail)") { + t.Errorf("missing unresolved warning; got:\n%s", out) + } + if !strings.Contains(out, "1.16.1 installed") { + t.Errorf("missing resolved line for axios; got:\n%s", out) + } +} + +func TestPrintBlockSummary_InstallFailed(t *testing.T) { + // The proxy resolved a safe version (so allowed is populated) but the package + // manager exited non-zero — e.g. a pin like ^1.17.0 that only the filtered + // version satisfies. The summary must NOT claim the package was "installed": + // the header warns the install did not complete, the per-line wording reads + // "available" (the version exists) not "installed", and a remediation Note is + // shown. + forceNoColor(t) + blocked := []supplychain.BlockedPackage{{Name: "axios", Version: "1.17.0", Age: 24 * time.Hour}} + allowed := []supplychain.InstalledPackage{{Name: "axios", Version: "1.16.1"}} + + out := captureStderr(t, func() { + printBlockSummary(blocked, allowed, 5, testPolicy(), pmNPM, false) + }) + + if !strings.Contains(out, "install did not complete") { + t.Errorf("missing failure header; got:\n%s", out) + } + if strings.Contains(out, "installed safe") { + t.Errorf("must not claim a successful install; got:\n%s", out) + } + if !strings.Contains(out, "1.16.1 available") { + t.Errorf("resolved line should read 'available' on a failed install; got:\n%s", out) + } + if strings.Contains(out, "1.16.1 installed") { + t.Errorf("must not say 'installed' when the PM did not complete; got:\n%s", out) + } + if !strings.Contains(out, "Note:") || !strings.Contains(out, "relax the constraint or exclude the package") { + t.Errorf("missing remediation note; got:\n%s", out) + } +} + +func TestPrintBlockSummary_LongListVerbose(t *testing.T) { + // Tier D: more than maxBlockedDisplay packages → capped list with "… and N + // more", the full divider, and the complete copy-paste disable command. + forceNoColor(t) + var blocked []supplychain.BlockedPackage + var allowed []supplychain.InstalledPackage + names := []string{"alpha", "bravo", "charlie", "delta", "echo", "foxtrot"} + for i, n := range names { + blocked = append(blocked, supplychain.BlockedPackage{ + Name: n, + Version: "2.0.0", + Age: time.Duration(i+1) * time.Hour, + }) + allowed = append(allowed, supplychain.InstalledPackage{Name: n, Version: "1.9.0"}) + } + + out := captureStderr(t, func() { + printBlockSummary(blocked, allowed, len(names), testPolicy(), pmNPM, true) + }) + + if !strings.Contains(out, "… and 1 more") { + t.Errorf("missing overflow line; got:\n%s", out) + } + if !strings.Contains(out, strings.Repeat("─", scSepLen)) { + t.Errorf("long list should draw the divider; got:\n%s", out) + } + if !strings.Contains(out, "ARMIS_SUPPLY_CHAIN=off npm install") { + t.Errorf("long list should show the full disable command; got:\n%s", out) + } +} + +func TestGroupBlockedByPackage_CollapsesToYoungest(t *testing.T) { + // Two blocked versions of one package collapse to a single result keyed on the + // youngest version — that is the one the PM would have installed as "latest". + blocked := []supplychain.BlockedPackage{ + {Name: "axios", Version: "1.17.0", Age: 48 * time.Hour}, + {Name: "axios", Version: "1.18.0", Age: 2 * time.Hour}, + } + allowed := map[string]string{"axios": "1.16.1"} + + got := groupBlockedByPackage(blocked, allowed, 72*time.Hour) + if len(got) != 1 { + t.Fatalf("expected 1 grouped result, got %d: %#v", len(got), got) + } + if got[0].OldVersion != "1.18.0" { + t.Errorf("OldVersion = %q, want the youngest 1.18.0", got[0].OldVersion) + } + if got[0].OldAge != 2*time.Hour { + t.Errorf("OldAge = %v, want 2h", got[0].OldAge) + } + if got[0].NewVersion != "1.16.1" { + t.Errorf("NewVersion = %q, want 1.16.1", got[0].NewVersion) + } +} + +func TestGroupBlockedByPackage_SortYoungestFirst(t *testing.T) { + // Results are ordered youngest-first so the freshest (riskiest) package leads. + blocked := []supplychain.BlockedPackage{ + {Name: "old", Version: "1.0.0", Age: 60 * time.Hour}, + {Name: "fresh", Version: "1.0.0", Age: 1 * time.Hour}, + {Name: "mid", Version: "1.0.0", Age: 12 * time.Hour}, + } + got := groupBlockedByPackage(blocked, map[string]string{}, 72*time.Hour) + wantOrder := []string{"fresh", "mid", "old"} + for i, w := range wantOrder { + if got[i].Name != w { + t.Errorf("position %d = %q, want %q (full: %#v)", i, got[i].Name, w, got) + } + } +} + +func TestFormatPolicyShort(t *testing.T) { + tests := []struct { + name string + d time.Duration + want string + }{ + {"days", 72 * time.Hour, "3-day"}, + {"one day", 24 * time.Hour, "1-day"}, + {"hours", 6 * time.Hour, "6-hour"}, + {"minutes", 30 * time.Minute, "30-minute"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := formatPolicyShort(tt.d); got != tt.want { + t.Errorf("formatPolicyShort(%v) = %q, want %q", tt.d, got, tt.want) + } + }) + } +} + +func TestRationaleMarker(t *testing.T) { + // Redirect the cache dir to a temp location. os.UserCacheDir honors HOME on + // darwin and XDG_CACHE_HOME on linux, so set both for portability. + tmp := t.TempDir() + t.Setenv("HOME", tmp) + t.Setenv("XDG_CACHE_HOME", tmp) + + if rationaleAlreadyShown() { + t.Fatal("marker should be absent in a fresh cache dir") + } + markRationaleShown() + if !rationaleAlreadyShown() { + t.Error("marker should be present after markRationaleShown") + } + // Idempotent: a second mark must not error or change the result. + markRationaleShown() + if !rationaleAlreadyShown() { + t.Error("marker should remain present after a second mark") + } +} + +func TestShouldShowRationale_SuppressedWhenNonInteractive(t *testing.T) { + // Under `go test` stdin/stderr are pipes, so cli.IsInteractive() is false and + // the rationale must stay suppressed even with no marker present — this is the + // CI / piped-output guarantee. + tmp := t.TempDir() + t.Setenv("HOME", tmp) + t.Setenv("XDG_CACHE_HOME", tmp) + + if cli.IsInteractive() { + t.Skip("test stderr/stdin are a TTY; cannot assert the non-interactive path") + } + if shouldShowRationale() { + t.Error("rationale must be suppressed on a non-interactive terminal") + } +} diff --git a/internal/cmd/supply_chain_wrap_test.go b/internal/cmd/supply_chain_wrap_test.go index ba97e534..1c9f0884 100644 --- a/internal/cmd/supply_chain_wrap_test.go +++ b/internal/cmd/supply_chain_wrap_test.go @@ -1,10 +1,241 @@ package cmd import ( + "context" + "os" + "path/filepath" "reflect" + "strings" "testing" + "time" + + "github.com/spf13/cobra" ) +// execPMCapture records the arguments execPMFunc was invoked with so tests can +// assert on the resolved PM name, forwarded args, and injected environment +// without spawning a real process. +type execPMCapture struct { + called bool + calls int + pm string + args []string + extraEnv []string +} + +// stubExecPM replaces the package-level execPMFunc with a capturing stub that +// returns the given exit code, restoring the real implementation on cleanup. It +// returns the capture so the test can inspect what would have been executed. +func stubExecPM(t *testing.T, exitCode int) *execPMCapture { + t.Helper() + cap := &execPMCapture{} + t.Cleanup(func() { execPMFunc = execPM }) + execPMFunc = func(pm string, args []string, extraEnv []string) (int, error) { + cap.called = true + cap.calls++ + cap.pm = pm + cap.args = args + cap.extraEnv = extraEnv + return exitCode, nil + } + return cap +} + +// envValue returns the value for key in a "KEY=value" environment slice, and +// whether it was present. +func envValue(env []string, key string) (string, bool) { + prefix := key + "=" + for _, e := range env { + if strings.HasPrefix(e, prefix) { + return strings.TrimPrefix(e, prefix), true + } + } + return "", false +} + +// newWrapTestCmd returns a cobra command with a live context, as the wrap +// runners expect (runProxyWrap/runPreInstallBlock derive a timeout from +// cmd.Context(), which panics on a nil context). +func newWrapTestCmd() *cobra.Command { + c := &cobra.Command{} + c.SetContext(context.Background()) + return c +} + +func TestRunSupplyChainWrap_SCActiveBypass(t *testing.T) { + // With ARMIS_SUPPLY_CHAIN_ACTIVE=1 the wrapper must pass straight through to + // the package manager (recursion guard) without starting a proxy. + t.Setenv(envSCActive, "1") + cap := stubExecPM(t, 0) + + err := runSupplyChainWrap(newWrapTestCmd(), []string{"npm", "install", "lodash"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !cap.called { + t.Fatal("expected execPMFunc to be called") + } + if cap.pm != "npm" { + t.Errorf("pm = %q, want npm", cap.pm) + } + if !reflect.DeepEqual(cap.args, []string{"install", "lodash"}) { + t.Errorf("args = %#v, want [install lodash]", cap.args) + } + // The passthrough must not inject the registry override. + if _, ok := envValue(cap.extraEnv, "npm_config_registry"); ok { + t.Error("passthrough should not set npm_config_registry") + } +} + +func TestRunSupplyChainWrap_Off(t *testing.T) { + // ARMIS_SUPPLY_CHAIN=off disables enforcement entirely. + t.Setenv(envSCActive, "") + t.Setenv(envSCOff, "off") + cap := stubExecPM(t, 0) + + err := runSupplyChainWrap(newWrapTestCmd(), []string{"npm", "install"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !cap.called { + t.Fatal("expected execPMFunc to be called for the off passthrough") + } +} + +func TestRunSupplyChainWrap_UnsupportedPM(t *testing.T) { + cap := stubExecPM(t, 0) + + err := runSupplyChainWrap(newWrapTestCmd(), []string{"cargo", "build"}) + if err == nil { + t.Fatal("expected an error for an unsupported package manager") + } + if cap.called { + t.Error("execPMFunc must not run for an unsupported package manager") + } +} + +func TestRunProxyWrap_InjectsNpmRegistryEnv(t *testing.T) { + // Run from an isolated dir so resolveWrapPolicy does not pick up a stray + // .armis-supply-chain.yaml from an ancestor of the repo checkout. + chdirTemp(t) + t.Setenv(envSCActive, "") + t.Setenv(envSCOff, "") + cap := stubExecPM(t, 0) + + if err := runProxyWrap(newWrapTestCmd(), pmNPM, []string{"install"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !cap.called { + t.Fatal("expected execPMFunc to be called") + } + reg, ok := envValue(cap.extraEnv, "npm_config_registry") + if !ok { + t.Fatalf("npm_config_registry not set; extraEnv=%v", cap.extraEnv) + } + if !strings.HasPrefix(reg, "http://127.0.0.1:") { + t.Errorf("npm_config_registry = %q, want http://127.0.0.1:/", reg) + } + // The recursion guard must be set for the child process. + if v, ok := envValue(cap.extraEnv, envSCActive); !ok || v != "1" { + t.Errorf("%s = %q (present=%v), want 1", envSCActive, v, ok) + } +} + +// TestRunProxyWrap_PipEnvPointsAtProxy asserts pip is routed through the proxy +// via PIP_INDEX_URL pointing at the local proxy's /simple/ endpoint. The actual +// PyPI Simple API age filtering the proxy performs in PyPI mode is covered by +// the proxy-layer tests (TestProxyFilterPyPISimple* in the supplychain package); +// this test pins the command-layer wiring that gets pip there. +func TestRunProxyWrap_PipEnvPointsAtProxy(t *testing.T) { + chdirTemp(t) + t.Setenv(envSCActive, "") + t.Setenv(envSCOff, "") + cap := stubExecPM(t, 0) + + if err := runProxyWrap(newWrapTestCmd(), pmPip, []string{"install", "requests"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + idx, ok := envValue(cap.extraEnv, "PIP_INDEX_URL") + if !ok { + t.Fatalf("PIP_INDEX_URL not set; extraEnv=%v", cap.extraEnv) + } + if !strings.HasPrefix(idx, "http://127.0.0.1:") || !strings.HasSuffix(idx, "/simple/") { + t.Errorf("PIP_INDEX_URL = %q, want http://127.0.0.1:/simple/", idx) + } +} + +func TestRunPreInstallBlock_AllPassRunsPM(t *testing.T) { + // A poetry.lock whose only entry is a git-sourced package: the parser drops + // it, so RunCheck has nothing to query (Checked == 0), no network is touched, + // and the build is allowed to run. + dir := chdirTemp(t) + t.Setenv(envSCActive, "") + t.Setenv(envSCOff, "") + t.Setenv(envSCSkip, "") + lock := `[[package]] +name = "my-git-dep" +version = "1.0.0" + +[package.source] +type = "git" +url = "https://github.com/user/repo.git" +` + if err := os.WriteFile(filepath.Join(dir, "poetry.lock"), []byte(lock), 0o600); err != nil { + t.Fatal(err) + } + cap := stubExecPM(t, 0) + + if err := runPreInstallBlock(newWrapTestCmd(), pmPoetry, []string{"install"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cap.calls != 1 { + t.Errorf("execPMFunc called %d times, want 1", cap.calls) + } + if cap.pm != pmPoetry { + t.Errorf("pm = %q, want %q", cap.pm, pmPoetry) + } +} + +func TestRunSupplyChainWrap_EcosystemScopeExcludesPM(t *testing.T) { + // Config scopes enforcement to npm only; a pip install must pass straight + // through to the real pip with no proxy started and no PIP_INDEX_URL injected. + dir := chdirTemp(t) + writeConfig(t, dir, "version: 1\necosystems:\n - npm\n") + t.Setenv(envSCActive, "") + t.Setenv(envSCOff, "") + cap := stubExecPM(t, 0) + + if err := runSupplyChainWrap(newWrapTestCmd(), []string{"pip", "install", "requests"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !cap.called { + t.Fatal("expected execPMFunc to be called (passthrough)") + } + if _, ok := envValue(cap.extraEnv, "PIP_INDEX_URL"); ok { + t.Error("out-of-scope pip must not be routed through the proxy (PIP_INDEX_URL set)") + } + if _, ok := envValue(cap.extraEnv, envSCActive); ok { + t.Error("passthrough must not set the recursion guard env") + } +} + +func TestRunSupplyChainWrap_EcosystemScopeIncludesPM(t *testing.T) { + // Config scopes to pip; a pip install is still enforced (routed through the + // proxy). This guards against the gate over-blocking an in-scope ecosystem. + dir := chdirTemp(t) + writeConfig(t, dir, "version: 1\necosystems:\n - pip\n") + t.Setenv(envSCActive, "") + t.Setenv(envSCOff, "") + cap := stubExecPM(t, 0) + + if err := runSupplyChainWrap(newWrapTestCmd(), []string{"pip", "install", "requests"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if _, ok := envValue(cap.extraEnv, "PIP_INDEX_URL"); !ok { + t.Errorf("in-scope pip should be routed through the proxy; extraEnv=%v", cap.extraEnv) + } +} + func TestParseSkipPackages(t *testing.T) { // Hoist the repeated package names into constants so the want slices below // don't trip goconst (which flags an identical string literal repeated 3+ @@ -43,3 +274,55 @@ func TestParseSkipPackages(t *testing.T) { }) } } + +func TestCheckGradleStaleness(t *testing.T) { + // checkGradleStaleness writes an advisory warning to stderr and returns + // nothing; these cases exercise each path condition (missing lockfile, + // missing build file, .kts fallback, stale vs fresh) to confirm it handles + // them without panicking and stats the right sibling file. + writeFile := func(t *testing.T, path string, mod time.Time) { + t.Helper() + if err := os.WriteFile(path, []byte("x"), 0o600); err != nil { + t.Fatal(err) + } + if !mod.IsZero() { + if err := os.Chtimes(path, mod, mod); err != nil { + t.Fatal(err) + } + } + } + + t.Run("missing lockfile is a no-op", func(t *testing.T) { + dir := t.TempDir() + checkGradleStaleness(filepath.Join(dir, "gradle.lockfile")) + }) + + t.Run("lockfile without build file is a no-op", func(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "gradle.lockfile"), time.Time{}) + checkGradleStaleness(filepath.Join(dir, "gradle.lockfile")) + }) + + t.Run("build.gradle newer than lockfile warns", func(t *testing.T) { + dir := t.TempDir() + old := time.Now().Add(-time.Hour) + writeFile(t, filepath.Join(dir, "gradle.lockfile"), old) + writeFile(t, filepath.Join(dir, "build.gradle"), time.Now()) + checkGradleStaleness(filepath.Join(dir, "gradle.lockfile")) + }) + + t.Run("build.gradle.kts fallback is detected", func(t *testing.T) { + dir := t.TempDir() + old := time.Now().Add(-time.Hour) + writeFile(t, filepath.Join(dir, "gradle.lockfile"), old) + writeFile(t, filepath.Join(dir, "build.gradle.kts"), time.Now()) + checkGradleStaleness(filepath.Join(dir, "gradle.lockfile")) + }) + + t.Run("fresh lockfile newer than build is a no-op", func(t *testing.T) { + dir := t.TempDir() + writeFile(t, filepath.Join(dir, "build.gradle"), time.Now().Add(-time.Hour)) + writeFile(t, filepath.Join(dir, "gradle.lockfile"), time.Now()) + checkGradleStaleness(filepath.Join(dir, "gradle.lockfile")) + }) +} diff --git a/internal/output/human.go b/internal/output/human.go index 49c0fc76..6db0015b 100644 --- a/internal/output/human.go +++ b/internal/output/human.go @@ -296,9 +296,13 @@ func (f *HumanFormatter) FormatWithOptions(result *model.ScanResult, w io.Writer // 1. Header banner (bold text, no box) ew.write("%s\n", s.HeaderBanner.Render("ARMIS SECURITY SCAN RESULTS")) - // 2. Scan ID & Status with styled labels and values + // 2. Scan ID & Status with styled labels and values. Skip the Scan ID line + // when there is no ID (e.g. the local `supply-chain check` audit, which has + // no cloud scan), so it doesn't render a dangling "Scan ID:" with no value. labelStyle := s.MutedText - ew.write("%s %s\n", labelStyle.Render("Scan ID:"), s.ScanID.Render(result.ScanID)) + if result.ScanID != "" { + ew.write("%s %s\n", labelStyle.Render("Scan ID:"), s.ScanID.Render(result.ScanID)) + } ew.write("%s %s\n", labelStyle.Render("Status:"), s.StatusComplete.Render(result.Status)) ew.write("\n") diff --git a/internal/output/human_format_test.go b/internal/output/human_format_test.go index cda1055f..5855af72 100644 --- a/internal/output/human_format_test.go +++ b/internal/output/human_format_test.go @@ -61,6 +61,32 @@ func TestHumanFormatter_Format(t *testing.T) { } } +// TestHumanFormatter_OmitsEmptyScanID confirms the "Scan ID:" label is not +// rendered when there is no scan ID — e.g. the local `supply-chain check` audit, +// which has no cloud scan. The Status line must still render. +func TestHumanFormatter_OmitsEmptyScanID(t *testing.T) { + formatter := &HumanFormatter{} + result := &model.ScanResult{ + ScanID: "", + Status: "completed", + Findings: []model.Finding{}, + Summary: model.Summary{Total: 0}, + } + + var buf bytes.Buffer + if err := formatter.Format(result, &buf); err != nil { + t.Fatalf("Format failed: %v", err) + } + + output := buf.String() + if strings.Contains(output, "Scan ID:") { + t.Error("expected no 'Scan ID:' label when ScanID is empty") + } + if !strings.Contains(output, "completed") { + t.Error("expected the Status line to still render") + } +} + func TestHumanFormatter_FormatWithOptions(t *testing.T) { formatter := &HumanFormatter{} diff --git a/internal/supplychain/check/check.go b/internal/supplychain/check/check.go index 84ae1742..4d0b0e43 100644 --- a/internal/supplychain/check/check.go +++ b/internal/supplychain/check/check.go @@ -19,7 +19,17 @@ type Result struct { Skipped int } +// registryFn resolves publish dates for a set of packages in an ecosystem. It +// is the seam queryRegistry satisfies in production; tests inject a closure that +// returns controlled timestamps (or errors) so the RunCheck pipeline can be +// exercised without real network calls. +type registryFn func(ctx context.Context, ecosystem supplychain.Ecosystem, packages []registry.PackageRequest) []registry.QueryResult + func RunCheck(ctx context.Context, policy supplychain.Policy, lockfilePath string, baseLockfilePath string) (*Result, error) { + return runCheck(ctx, policy, lockfilePath, baseLockfilePath, queryRegistry) +} + +func runCheck(ctx context.Context, policy supplychain.Policy, lockfilePath string, baseLockfilePath string, queryRegistryFn registryFn) (*Result, error) { ecosystem := detectEcosystemFromPath(lockfilePath) // armis:ignore cwe:22 cwe:23 cwe:73 reason:local CLI auditing the user's own project; lockfilePath comes from lockfile auto-detection or an explicit --lockfile flag the user controls, not untrusted network input (readLockfile also size-bounds the read) @@ -51,7 +61,7 @@ func RunCheck(ctx context.Context, policy supplychain.Policy, lockfilePath strin return &Result{Skipped: skipped}, nil } - results := queryRegistry(ctx, ecosystem, toCheck) + results := queryRegistryFn(ctx, ecosystem, toCheck) now := time.Now() var violations []supplychain.Violation @@ -127,6 +137,14 @@ func queryRegistry(ctx context.Context, ecosystem supplychain.Ecosystem, package } } +// DetectEcosystemFromPath classifies a lockfile path to its ecosystem using the +// same suffix rules RunCheck applies internally. Exported so callers outside the +// package (e.g. the check command's ecosystem-scoping gate) classify a lockfile +// exactly as the audit will, including the requirements*.txt special cases. +func DetectEcosystemFromPath(path string) supplychain.Ecosystem { + return detectEcosystemFromPath(path) +} + func detectEcosystemFromPath(path string) supplychain.Ecosystem { lower := strings.ToLower(path) switch { diff --git a/internal/supplychain/check/check_test.go b/internal/supplychain/check/check_test.go index d08ed8f0..f661cf35 100644 --- a/internal/supplychain/check/check_test.go +++ b/internal/supplychain/check/check_test.go @@ -1,9 +1,18 @@ package check import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "path/filepath" + "strings" "testing" + "time" + "github.com/ArmisSecurity/armis-cli/internal/model" "github.com/ArmisSecurity/armis-cli/internal/supplychain" + "github.com/ArmisSecurity/armis-cli/internal/supplychain/registry" ) // testPkgSQLAlchemy is the mixed-case package name used across the Python @@ -11,15 +20,19 @@ import ( // (lowercasing). Shared so the assertion reads the same in every parser test. const testPkgSQLAlchemy = "SQLAlchemy" +// pkgExpress is the npm package name used across the diff and RunCheck tests; +// hoisted to a constant so the repeated literal does not trip goconst. +const pkgExpress = "express" + func TestDiffEntries(t *testing.T) { t.Run("returns only new packages", func(t *testing.T) { current := []PackageEntry{ - {Name: "express", Version: "4.18.2"}, + {Name: pkgExpress, Version: "4.18.2"}, {Name: "lodash", Version: "4.17.21"}, {Name: "new-pkg", Version: "1.0.0"}, } base := []PackageEntry{ - {Name: "express", Version: "4.18.2"}, + {Name: pkgExpress, Version: "4.18.2"}, {Name: "lodash", Version: "4.17.21"}, } @@ -35,10 +48,10 @@ func TestDiffEntries(t *testing.T) { t.Run("version upgrade counts as new", func(t *testing.T) { current := []PackageEntry{ - {Name: "express", Version: "4.19.0"}, + {Name: pkgExpress, Version: "4.19.0"}, } base := []PackageEntry{ - {Name: "express", Version: "4.18.2"}, + {Name: pkgExpress, Version: "4.18.2"}, } result := diffEntries(current, base) @@ -63,7 +76,7 @@ func TestDiffEntries(t *testing.T) { t.Run("identical sets returns empty", func(t *testing.T) { entries := []PackageEntry{ - {Name: "express", Version: "4.18.2"}, + {Name: pkgExpress, Version: "4.18.2"}, } result := diffEntries(entries, entries) @@ -118,3 +131,234 @@ func TestDetectEcosystemFromPath(t *testing.T) { }) } } + +// staticAgeResolver returns a registryFn that reports every requested package as +// published `age` ago, so a single age value drives the violation outcome. +func staticAgeResolver(age time.Duration) registryFn { + return func(_ context.Context, _ supplychain.Ecosystem, packages []registry.PackageRequest) []registry.QueryResult { + now := time.Now() + out := make([]registry.QueryResult, len(packages)) + for i, p := range packages { + out[i] = registry.QueryResult{Name: p.Name, Version: p.Version, PublishTime: now.Add(-age)} + } + return out + } +} + +// ageByNameResolver returns a registryFn that reports each package as published +// the age in `ages` (keyed by name) ago, falling back to `fallback` for names +// not listed. It lets a test make one package young while the rest stay old. +func ageByNameResolver(ages map[string]time.Duration, fallback time.Duration) registryFn { + return func(_ context.Context, _ supplychain.Ecosystem, packages []registry.PackageRequest) []registry.QueryResult { + now := time.Now() + out := make([]registry.QueryResult, len(packages)) + for i, p := range packages { + age := fallback + if a, ok := ages[p.Name]; ok { + age = a + } + out[i] = registry.QueryResult{Name: p.Name, Version: p.Version, PublishTime: now.Add(-age)} + } + return out + } +} + +func TestRunCheck(t *testing.T) { + npmLock := filepath.Join("testdata", "valid-v3.json") + poetryLock := filepath.Join("testdata", "poetry.lock") + pomXML := filepath.Join("testdata", "pom.xml") + // valid-v3.json yields four checkable packages after the parser drops the + // file:/git+/link: entries: express, @types/node, lodash, debug. + const npmPkgCount = 4 + defaultPolicy := supplychain.Policy{MinReleaseAge: 72 * time.Hour} + old := 100 * 24 * time.Hour // comfortably older than the 72h threshold + + t.Run("npm clean pass - all packages old", func(t *testing.T) { + res, err := runCheck(context.Background(), defaultPolicy, npmLock, "", staticAgeResolver(old)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if res.Checked != npmPkgCount { + t.Errorf("Checked = %d, want %d", res.Checked, npmPkgCount) + } + if len(res.Violations) != 0 { + t.Errorf("expected no violations, got %+v", res.Violations) + } + }) + + t.Run("npm violation - one recent package is High severity", func(t *testing.T) { + res, err := runCheck(context.Background(), defaultPolicy, npmLock, "", + ageByNameResolver(map[string]time.Duration{pkgExpress: 12 * time.Hour}, old)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(res.Violations) != 1 { + t.Fatalf("expected 1 violation, got %d: %+v", len(res.Violations), res.Violations) + } + v := res.Violations[0] + if v.Name != pkgExpress { + t.Errorf("violation name = %q, want express", v.Name) + } + // 12h < 24h, so ClassifySeverity returns High. + if v.Severity != model.SeverityHigh { + t.Errorf("severity = %q, want %q", v.Severity, model.SeverityHigh) + } + }) + + t.Run("routes poetry.lock to the PyPI ecosystem", func(t *testing.T) { + var gotEcosystem supplychain.Ecosystem + _, err := runCheck(context.Background(), defaultPolicy, poetryLock, "", + func(_ context.Context, eco supplychain.Ecosystem, packages []registry.PackageRequest) []registry.QueryResult { + gotEcosystem = eco + return staticAgeResolver(old)(context.Background(), eco, packages) + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if gotEcosystem != supplychain.EcosystemPoetry { + t.Errorf("ecosystem passed to resolver = %q, want %q", gotEcosystem, supplychain.EcosystemPoetry) + } + }) + + t.Run("routes pom.xml to the Maven ecosystem", func(t *testing.T) { + var gotEcosystem supplychain.Ecosystem + _, err := runCheck(context.Background(), defaultPolicy, pomXML, "", + func(_ context.Context, eco supplychain.Ecosystem, packages []registry.PackageRequest) []registry.QueryResult { + gotEcosystem = eco + return staticAgeResolver(old)(context.Background(), eco, packages) + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if gotEcosystem != supplychain.EcosystemMaven { + t.Errorf("ecosystem passed to resolver = %q, want %q", gotEcosystem, supplychain.EcosystemMaven) + } + }) + + t.Run("base lockfile diff excludes unchanged packages", func(t *testing.T) { + // Same file as current and base: every entry is unchanged, so nothing + // reaches the registry and the resolver must never be called. + res, err := runCheck(context.Background(), defaultPolicy, npmLock, npmLock, + func(_ context.Context, _ supplychain.Ecosystem, _ []registry.PackageRequest) []registry.QueryResult { + t.Error("resolver should not be called when all packages are diffed out") + return nil + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if res.Checked != 0 { + t.Errorf("Checked = %d, want 0", res.Checked) + } + }) + + t.Run("policy exclusion skips a package", func(t *testing.T) { + policy := supplychain.Policy{MinReleaseAge: 72 * time.Hour, Exclusions: []string{pkgExpress}} + res, err := runCheck(context.Background(), policy, npmLock, "", staticAgeResolver(old)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if res.Skipped != 1 { + t.Errorf("Skipped = %d, want 1", res.Skipped) + } + if res.Checked != npmPkgCount-1 { + t.Errorf("Checked = %d, want %d", res.Checked, npmPkgCount-1) + } + }) + + t.Run("registry error becomes a warning not a violation", func(t *testing.T) { + res, err := runCheck(context.Background(), defaultPolicy, npmLock, "", + func(_ context.Context, _ supplychain.Ecosystem, packages []registry.PackageRequest) []registry.QueryResult { + now := time.Now() + out := make([]registry.QueryResult, len(packages)) + for i, p := range packages { + if p.Name == pkgExpress { + out[i] = registry.QueryResult{Name: p.Name, Version: p.Version, Err: fmt.Errorf("registry unreachable")} + continue + } + out[i] = registry.QueryResult{Name: p.Name, Version: p.Version, PublishTime: now.Add(-old)} + } + return out + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(res.Warnings) != 1 { + t.Errorf("expected 1 warning, got %d: %v", len(res.Warnings), res.Warnings) + } + if len(res.Violations) != 0 { + t.Errorf("expected no violations, got %+v", res.Violations) + } + }) + + t.Run("all packages excluded returns early without querying", func(t *testing.T) { + policy := supplychain.Policy{ + MinReleaseAge: 72 * time.Hour, + Exclusions: []string{pkgExpress, "lodash", "debug", "@types/node"}, + } + res, err := runCheck(context.Background(), policy, npmLock, "", + func(_ context.Context, _ supplychain.Ecosystem, _ []registry.PackageRequest) []registry.QueryResult { + t.Error("resolver should not be called when every package is excluded") + return nil + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if res.Checked != 0 { + t.Errorf("Checked = %d, want 0", res.Checked) + } + if res.Skipped != npmPkgCount { + t.Errorf("Skipped = %d, want %d", res.Skipped, npmPkgCount) + } + }) + + t.Run("parse error propagates", func(t *testing.T) { + // A missing lockfile cannot be parsed; the error must surface rather than + // being swallowed into a false "all clear". + missing := filepath.Join(t.TempDir(), "package-lock.json") + _, err := runCheck(context.Background(), defaultPolicy, missing, "", staticAgeResolver(old)) + if err == nil { + t.Fatal("expected error for missing lockfile, got nil") + } + }) +} + +// TestRunCheckEndToEndNPM exercises the full pipeline against a mock npm +// registry: lockfile parse -> real registry client over httptest -> publish-date +// classification. This is the one case that validates the on-the-wire npm "time" +// metadata format the production queryRegistry depends on. +func TestRunCheckEndToEndNPM(t *testing.T) { + // One package is freshly published (now) and must be flagged; the rest are + // old. The handler answers npm metadata for any requested package name. + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + name := strings.TrimPrefix(r.URL.Path, "/") + w.Header().Set("Content-Type", "application/json") + switch name { + case pkgExpress: + // Published "now" (RFC3339) -> younger than the 72h threshold. + _, _ = fmt.Fprintf(w, `{"time":{"4.18.2":%q}}`, time.Now().UTC().Format(time.RFC3339)) + default: + // Old enough to pass for every other package version in the fixture. + _, _ = fmt.Fprint(w, `{"time":{`+ + `"20.10.0":"2020-01-01T00:00:00Z",`+ + `"4.17.21":"2020-01-01T00:00:00Z",`+ + `"2.6.9":"2020-01-01T00:00:00Z"}}`) + } + })) + defer server.Close() + + resolver := func(ctx context.Context, _ supplychain.Ecosystem, packages []registry.PackageRequest) []registry.QueryResult { + return registry.NewClientWithHTTP(server.Client(), server.URL).GetPublishDates(ctx, packages) + } + + policy := supplychain.Policy{MinReleaseAge: 72 * time.Hour} + res, err := runCheck(context.Background(), policy, filepath.Join("testdata", "valid-v3.json"), "", resolver) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(res.Violations) != 1 { + t.Fatalf("expected 1 violation (express), got %d: %+v", len(res.Violations), res.Violations) + } + if res.Violations[0].Name != pkgExpress { + t.Errorf("violation name = %q, want express", res.Violations[0].Name) + } +} diff --git a/internal/supplychain/config.go b/internal/supplychain/config.go index cf98a5df..2254803b 100644 --- a/internal/supplychain/config.go +++ b/internal/supplychain/config.go @@ -18,9 +18,48 @@ type Config struct { Version int `yaml:"version,omitempty"` MinAge string `yaml:"min-age,omitempty"` Exclusions []string `yaml:"exclusions,omitempty"` + Ecosystems []string `yaml:"ecosystems,omitempty"` FailOpen bool `yaml:"fail-open,omitempty"` } +// ecosystemAliasPipenv is the user-facing name for the Pipfile/pipenv ecosystem. +// --help and the generated config call it "pipenv" (the tool name users know), +// while the internal constant is EcosystemPipfile (named after Pipfile.lock). +// Accepting both means copying "pipenv" from the docs never triggers a false +// "unknown ecosystem" warning. +const ecosystemAliasPipenv = "pipenv" + +// knownEcosystems is the set of ecosystem names accepted in the config's +// "ecosystems" list, covering every package manager supply-chain supports +// (Node, Python, Java). It backs UnknownEcosystems so a typo in the config +// surfaces as a warning rather than being silently ignored. It is built from the +// typed Ecosystem constants (plus the "pipenv" alias) so the accepted set stays +// in lockstep with detection and there is a single source of truth. +var knownEcosystems = func() map[string]bool { + m := make(map[string]bool) + for _, e := range []Ecosystem{ + EcosystemNPM, EcosystemPNPM, EcosystemBun, EcosystemYarn, + EcosystemPip, EcosystemPoetry, EcosystemPipfile, EcosystemPDM, EcosystemUV, + EcosystemMaven, EcosystemGradle, + } { + m[string(e)] = true + } + m[ecosystemAliasPipenv] = true // accept the tool name as an alias for pipfile + return m +}() + +// knownEcosystemsHint is the human-facing "supported" list shown when an unknown +// ecosystem name is found in a config. It leads with "pipenv" (the tool name in +// --help) and omits the "pipfile" alias to keep the guidance aligned with the +// docs; both names are still accepted by knownEcosystems. +const knownEcosystemsHint = "npm, pnpm, bun, yarn, pip, poetry, pipenv, pdm, uv, maven, gradle" + +// KnownEcosystemsHint returns the human-facing list of supported ecosystem names +// for use in CLI warnings, so the message stays in sync with knownEcosystems. +func KnownEcosystemsHint() string { + return knownEcosystemsHint +} + // LoadConfig reads the supply-chain config from dir, returning (nil, nil) when // the file is absent. dir is the user's own project directory and ConfigFileName // is a constant literal leaf with no separators or "..", so the joined path always @@ -78,6 +117,62 @@ func (c *Config) ToPolicy() (Policy, error) { return policy, nil } +// UnknownEcosystems returns the names listed under "ecosystems" in the config +// that are not recognized supply-chain ecosystems, in the order they appear. +// It is pure (no I/O) so callers can decide how to surface the result; the CLI +// emits a warning, while tests assert on the returned slice directly. A typo +// like "pyhton" would otherwise be silently ignored, giving a false sense that +// a policy is scoped when it is not. +func (c *Config) UnknownEcosystems() []string { + var unknown []string + for _, eco := range c.Ecosystems { + if !knownEcosystems[eco] { + unknown = append(unknown, eco) + } + } + return unknown +} + +// EnforcesEcosystem reports whether eco should be enforced under this config. +// +// Semantics, chosen to fail safe for a security control: +// - A nil config or an empty "ecosystems" list means "enforce every ecosystem" +// (the default — the field is opt-in scoping, not opt-out). +// - A list containing at least one recognized ecosystem restricts enforcement +// to the listed ecosystems only. +// - A list whose entries are ALL unrecognized (e.g. every name is a typo) is +// treated as no restriction at all, so a misspelling cannot silently disable +// the control. The typos are surfaced separately via UnknownEcosystems. +// +// The "pipenv" tool-name alias matches EcosystemPipfile, mirroring the alias +// accepted by knownEcosystems and the generated config. +func (c *Config) EnforcesEcosystem(eco Ecosystem) bool { + if c == nil { + return true + } + + hasKnown := false + for _, name := range c.Ecosystems { + if knownEcosystems[name] { + hasKnown = true + break + } + } + if !hasKnown { + return true + } + + for _, name := range c.Ecosystems { + if name == string(eco) { + return true + } + if eco == EcosystemPipfile && name == ecosystemAliasPipenv { + return true + } + } + return false +} + // FindConfigDir walks up from startDir looking for a directory that contains // ConfigFileName, returning that directory (or "" if none is found). startDir is // resolved to an absolute path first so the upward walk works even when callers diff --git a/internal/supplychain/config_test.go b/internal/supplychain/config_test.go index c1b7e6da..ec5d27af 100644 --- a/internal/supplychain/config_test.go +++ b/internal/supplychain/config_test.go @@ -37,10 +37,7 @@ fail-open: true } }) - t.Run("unknown fields like ecosystems are ignored", func(t *testing.T) { - // `ecosystems:` was removed from the schema; an existing config that still - // carries it must load without error (yaml.v3 ignores unknown keys) so we - // stay backward-compatible with files written by older versions. + t.Run("ecosystems field is parsed", func(t *testing.T) { dir := t.TempDir() content := `min-age: 5d ecosystems: @@ -59,6 +56,9 @@ ecosystems: if cfg.MinAge != "5d" { t.Errorf("expected min-age=5d, got %s", cfg.MinAge) } + if len(cfg.Ecosystems) != 2 || cfg.Ecosystems[0] != "npm" || cfg.Ecosystems[1] != "pnpm" { + t.Errorf("expected ecosystems [npm pnpm], got %v", cfg.Ecosystems) + } }) t.Run("file not found returns nil", func(t *testing.T) { @@ -208,3 +208,95 @@ func TestFindConfigDir(t *testing.T) { } }) } + +func TestUnknownEcosystems(t *testing.T) { + t.Run("all known returns nil", func(t *testing.T) { + cfg := &Config{Ecosystems: []string{"npm", "pip", "maven", "gradle", "uv"}} + if got := cfg.UnknownEcosystems(); got != nil { + t.Errorf("expected nil for all-known ecosystems, got %v", got) + } + }) + + t.Run("flags unknown names in order", func(t *testing.T) { + cfg := &Config{Ecosystems: []string{"npm", "pyhton", "cargo", "pip"}} + got := cfg.UnknownEcosystems() + want := []string{"pyhton", "cargo"} + if len(got) != len(want) { + t.Fatalf("expected %v, got %v", want, got) + } + for i := range want { + if got[i] != want[i] { + t.Errorf("at %d: expected %q, got %q", i, want[i], got[i]) + } + } + }) + + t.Run("empty config returns nil", func(t *testing.T) { + cfg := &Config{} + if got := cfg.UnknownEcosystems(); got != nil { + t.Errorf("expected nil for empty ecosystems, got %v", got) + } + }) + + t.Run("every supported ecosystem is recognized", func(t *testing.T) { + all := []string{ + string(EcosystemNPM), string(EcosystemPNPM), string(EcosystemBun), string(EcosystemYarn), + string(EcosystemPip), string(EcosystemPoetry), string(EcosystemPipfile), string(EcosystemPDM), + string(EcosystemUV), string(EcosystemMaven), string(EcosystemGradle), + } + cfg := &Config{Ecosystems: all} + if got := cfg.UnknownEcosystems(); got != nil { + t.Errorf("expected all %d ecosystems recognized, got unknown %v", len(all), got) + } + }) + + t.Run("pipenv alias is accepted alongside pipfile", func(t *testing.T) { + // --help and the generated config call this ecosystem "pipenv" (the tool + // name), while the internal constant is "pipfile". Both must be accepted + // so copying "pipenv" from the docs never triggers a false typo warning. + cfg := &Config{Ecosystems: []string{ecosystemAliasPipenv, string(EcosystemPipfile)}} + if got := cfg.UnknownEcosystems(); got != nil { + t.Errorf("expected both pipenv and pipfile recognized, got unknown %v", got) + } + }) +} + +func TestEnforcesEcosystem(t *testing.T) { + tests := []struct { + name string + cfg *Config + eco Ecosystem + want bool + }{ + // Fail-safe defaults: no scoping means enforce everything. + {"nil config enforces all", nil, EcosystemNPM, true}, + {"empty list enforces all", &Config{}, EcosystemPip, true}, + {"nil ecosystems slice enforces all", &Config{Ecosystems: nil}, EcosystemMaven, true}, + + // Explicit scoping restricts to the listed ecosystems. + {"listed ecosystem enforced", &Config{Ecosystems: []string{"npm", "pip"}}, EcosystemNPM, true}, + {"unlisted ecosystem not enforced", &Config{Ecosystems: []string{"npm", "pip"}}, EcosystemMaven, false}, + {"single-entry scope excludes others", &Config{Ecosystems: []string{"npm"}}, EcosystemPNPM, false}, + + // The pipenv tool-name alias matches the internal pipfile ecosystem. + {"pipenv alias matches pipfile", &Config{Ecosystems: []string{"pipenv"}}, EcosystemPipfile, true}, + {"pipfile name matches pipfile", &Config{Ecosystems: []string{"pipfile"}}, EcosystemPipfile, true}, + + // A list of only-unknown names must NOT silently disable enforcement: a + // pure typo should fail safe (enforce everything) rather than turn the + // control off. The typo is surfaced separately via UnknownEcosystems. + {"all-typo list enforces all (fail safe)", &Config{Ecosystems: []string{"pyhton", "nmp"}}, EcosystemNPM, true}, + // A mix of one valid + one typo restricts to the valid one (the typo is + // ignored for scoping, still warned about elsewhere). + {"valid plus typo restricts to valid", &Config{Ecosystems: []string{"npm", "pyhton"}}, EcosystemNPM, true}, + {"valid plus typo excludes unlisted", &Config{Ecosystems: []string{"npm", "pyhton"}}, EcosystemPip, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.cfg.EnforcesEcosystem(tt.eco); got != tt.want { + t.Errorf("EnforcesEcosystem(%q) = %v, want %v", tt.eco, got, tt.want) + } + }) + } +} diff --git a/internal/supplychain/proxy.go b/internal/supplychain/proxy.go index 689acf07..d8d21a4d 100644 --- a/internal/supplychain/proxy.go +++ b/internal/supplychain/proxy.go @@ -17,8 +17,29 @@ import ( const ( defaultUpstreamRegistry = "https://registry.npmjs.org" + defaultPyPIIndex = "https://pypi.org" maxProxyResponseSize = 20 * 1024 * 1024 distTagLatest = "latest" + + // pypiSimpleJSONAccept is the PEP 691 content type for the PyPI Simple API + // JSON representation. The default Simple API response is HTML (PEP 503), + // which carries no upload timestamps; only the JSON form exposes the PEP 700 + // per-file "upload-time" field the age filter needs, so the proxy must send + // this Accept header upstream to obtain timestamps at all. + pypiSimpleJSONAccept = "application/vnd.pypi.simple.v1+json" +) + +// ProxyMode selects the upstream registry protocol the proxy speaks. The npm +// registry and the PyPI Simple API are entirely different shapes (one JSON blob +// with a version→time map vs. a per-file distribution index), so metadata +// detection and age filtering differ by mode. +type ProxyMode int + +const ( + // ModeNPM filters the npm registry's package metadata document. + ModeNPM ProxyMode = iota + // ModePyPI filters the PyPI Simple API (PEP 691/700 JSON) file index. + ModePyPI ) type BlockedPackage struct { @@ -34,6 +55,7 @@ type InstalledPackage struct { type Proxy struct { policy Policy + mode ProxyMode upstreamURL *url.URL httpClient *http.Client revProxy *httputil.ReverseProxy @@ -50,6 +72,7 @@ type Proxy struct { type ProxyConfig struct { Policy Policy + Mode ProxyMode UpstreamURL string SkipPackages []string } @@ -57,7 +80,14 @@ type ProxyConfig struct { func NewProxy(cfg ProxyConfig) (*Proxy, error) { upstream := cfg.UpstreamURL if upstream == "" { - upstream = defaultUpstreamRegistry + // Default the upstream to match the mode so a PyPI proxy constructed with + // only a Mode (the common case) still points at pypi.org rather than the + // npm registry. + if cfg.Mode == ModePyPI { + upstream = defaultPyPIIndex + } else { + upstream = defaultUpstreamRegistry + } } upstreamURL, err := url.Parse(upstream) @@ -70,13 +100,28 @@ func NewProxy(cfg ProxyConfig) (*Proxy, error) { skipSet[pkg] = true } + // NewSingleHostReverseProxy rewrites the request URL's scheme/host but + // deliberately does NOT rewrite the Host header (see its doc comment). Left + // as-is, tarball passthrough requests reach the upstream registry carrying + // the local proxy's Host (e.g. "127.0.0.1:61396"). registry.npmjs.org is + // fronted by a CDN that routes on Host, so an unknown value returns 403 — + // the metadata check passes but the actual .tgz download fails. Wrap the + // Director to point Host at the upstream registry so passthrough works. + revProxy := httputil.NewSingleHostReverseProxy(upstreamURL) + baseDirector := revProxy.Director + revProxy.Director = func(req *http.Request) { + baseDirector(req) + req.Host = upstreamURL.Host + } + return &Proxy{ policy: cfg.Policy, + mode: cfg.Mode, upstreamURL: upstreamURL, httpClient: &http.Client{ Timeout: 30 * time.Second, }, - revProxy: httputil.NewSingleHostReverseProxy(upstreamURL), + revProxy: revProxy, skipPackages: skipSet, allowed: make(map[string]string), }, nil @@ -147,9 +192,17 @@ func (p *Proxy) Close() error { } func (p *Proxy) handleRequest(w http.ResponseWriter, r *http.Request) { - pkgName := extractPackageNameFromPath(r.URL.Path) + var pkgName string + var isMetadata bool + if p.mode == ModePyPI { + pkgName = extractPyPIPackageNameFromPath(r.URL.Path) + isMetadata = isPyPIMetadataRequest(r.URL.Path) + } else { + pkgName = extractPackageNameFromPath(r.URL.Path) + isMetadata = isMetadataRequest(r.URL.Path) + } - if pkgName == "" || r.Method != http.MethodGet || !isMetadataRequest(r.URL.Path) { + if pkgName == "" || r.Method != http.MethodGet || !isMetadata { p.reverseProxy(w, r) return } @@ -176,7 +229,13 @@ func (p *Proxy) handleMetadataFiltering(w http.ResponseWriter, r *http.Request, http.Error(w, fmt.Sprintf("[armis] supply-chain: failed to create request for %s", pkgName), http.StatusBadGateway) return } - upstreamReq.Header.Set("Accept", "application/json") + if p.mode == ModePyPI { + // Request the PEP 691 JSON form so the response carries PEP 700 per-file + // upload-time fields; the default Simple API HTML has no timestamps. + upstreamReq.Header.Set("Accept", pypiSimpleJSONAccept) + } else { + upstreamReq.Header.Set("Accept", "application/json") + } // armis:ignore cwe:918 reason:p.upstreamURL is a startup-configured trusted host (defaults to registry.npmjs.org); the request host is not attacker-controlled resp, err := p.httpClient.Do(upstreamReq) //nolint:gosec // URL constructed from trusted upstream config @@ -234,7 +293,17 @@ func (p *Proxy) handleMetadataFiltering(w http.ResponseWriter, r *http.Request, return } - filtered, blocked := p.filterMetadata(body, pkgName) + var filtered []byte + var blocked []BlockedPackage + contentType := "application/json" + if p.mode == ModePyPI { + filtered, blocked = p.filterPyPISimple(body, pkgName) + // Echo the PEP 691 JSON content type so pip/uv parse the body as the + // Simple API JSON representation rather than guessing. + contentType = pypiSimpleJSONAccept + } else { + filtered, blocked = p.filterMetadata(body, pkgName) + } if blocked != nil { p.blockedMu.Lock() p.blocked = append(p.blocked, blocked...) @@ -249,7 +318,7 @@ func (p *Proxy) handleMetadataFiltering(w http.ResponseWriter, r *http.Request, // unvalidated upstream header values verbatim is an HTTP-response-splitting // vector (CWE-93). copyCacheHeaders sanitizes each value before writing it. copyCacheHeaders(w.Header(), resp.Header, blocked != nil) - w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Type", contentType) w.WriteHeader(http.StatusOK) w.Write(filtered) //nolint:errcheck,gosec } @@ -440,6 +509,112 @@ func (p *Proxy) filterMetadata(body []byte, pkgName string) ([]byte, []BlockedPa return result, blocked } +// filterPyPISimple filters a PEP 691 Simple API JSON document, removing every +// distribution file whose PEP 700 "upload-time" is younger than the policy +// threshold. It returns the re-marshaled body and the list of blocked files +// (one BlockedPackage per file, since PyPI can add a new file to an existing +// version — per-file filtering catches that where per-version would not). +// +// Files are decoded as map[string]json.RawMessage so every untouched field +// (url, hashes, requires-python, yanked, dist-info-metadata, ...) round-trips +// verbatim; only "upload-time" is inspected. +// +// Fail-closed posture: a file with a missing or unparseable "upload-time" is +// REMOVED, not kept. The whole point of this control is age verification, so an +// undatable file (e.g. an HTML response slipping through, or a registry that +// omits PEP 700 timestamps) must not be silently installable. The version label +// in BlockedPackage is the filename, which is what the user sees and what makes +// the block actionable. +func (p *Proxy) filterPyPISimple(body []byte, pkgName string) ([]byte, []BlockedPackage) { + var doc map[string]json.RawMessage + if err := json.Unmarshal(body, &doc); err != nil { + return body, nil + } + + filesRaw, ok := doc["files"] + if !ok { + return body, nil + } + + var files []map[string]json.RawMessage + if err := json.Unmarshal(filesRaw, &files); err != nil { + return body, nil + } + + now := time.Now() + kept := make([]map[string]json.RawMessage, 0, len(files)) + var blocked []BlockedPackage + + for _, f := range files { + filename := jsonString(f["filename"]) + age, ok := pypiFileAge(f["upload-time"], now) + if !ok || age < p.policy.MinReleaseAge { + // Undatable or too-young file: remove it. Record an age of 0 for the + // undatable case so the summary still names the blocked file. + if !ok { + age = 0 + } + blocked = append(blocked, BlockedPackage{Name: pkgName, Version: filename, Age: age}) + continue + } + kept = append(kept, f) + } + + if len(blocked) == 0 { + return body, nil + } + + newFiles, err := json.Marshal(kept) + if err != nil { + return body, blocked + } + doc["files"] = newFiles + + // PEP 700 also exposes a "versions" array. We intentionally leave it intact: + // a version remains "known" to the index even when all its files are filtered + // out — pip simply finds no installable distribution for it and reports no + // matching distribution, which is the correct fail-closed outcome. Rewriting + // it risks desyncing from clients that key off the versions list. + + result, err := json.Marshal(doc) + if err != nil { + return body, blocked + } + return result, blocked +} + +// pypiFileAge parses a PEP 700 "upload-time" raw JSON value and returns the age +// relative to now. The bool is false when the field is absent or unparseable. +// PEP 700 specifies RFC 3339; some mirrors omit the timezone, so a no-zone +// fallback is accepted as well (mirroring the PyPI registry client). +func pypiFileAge(raw json.RawMessage, now time.Time) (time.Duration, bool) { + s := jsonString(raw) + if s == "" { + return 0, false + } + t, err := time.Parse(time.RFC3339, s) + if err != nil { + t, err = time.Parse("2006-01-02T15:04:05", s) + if err != nil { + return 0, false + } + } + return now.Sub(t), true +} + +// jsonString decodes a JSON string value, returning "" for absent or non-string +// values rather than erroring — callers treat both as "field not usable". +func jsonString(raw json.RawMessage) string { + if len(raw) == 0 { + return "" + } + var s string + if err := json.Unmarshal(raw, &s); err != nil { + return "" + } + return s +} + func (p *Proxy) reverseProxy(w http.ResponseWriter, r *http.Request) { p.revProxy.ServeHTTP(w, r) //nolint:gosec // G704: single-host reverse proxy to a fixed upstream registry set at construction, not request-controlled } @@ -489,3 +664,36 @@ func isPrerelease(version string) bool { parts := strings.SplitN(version, "-", 2) return len(parts) == 2 && parts[0] != "" } + +// extractPyPIPackageNameFromPath pulls the project name from a PyPI Simple API +// request path of the form "/simple//". It returns "" for the index root +// ("/simple/") and for any path that is not a single project under /simple/, so +// only per-project metadata requests are filtered. +func extractPyPIPackageNameFromPath(path string) string { + if decoded, err := url.PathUnescape(path); err == nil { + path = decoded + } + path = strings.Trim(path, "/") + const prefix = "simple" + if path == prefix { + return "" // index root, not a project page + } + rest, ok := strings.CutPrefix(path, prefix+"/") + if !ok { + return "" + } + // rest should be a single project segment (with or without trailing slash, + // already trimmed). A nested path (e.g. files) is not a metadata request. + if rest == "" || strings.Contains(rest, "/") { + return "" + } + return rest +} + +// isPyPIMetadataRequest reports whether a path targets a PyPI Simple API project +// page (which the proxy filters) rather than the index root or a file download. +// File downloads are served from a separate host (files.pythonhosted.org) and +// never reach this proxy, so a path-based check on "/simple//" suffices. +func isPyPIMetadataRequest(path string) bool { + return extractPyPIPackageNameFromPath(path) != "" +} diff --git a/internal/supplychain/proxy_test.go b/internal/supplychain/proxy_test.go index 2d390542..0f7ef13c 100644 --- a/internal/supplychain/proxy_test.go +++ b/internal/supplychain/proxy_test.go @@ -7,6 +7,7 @@ import ( "net" "net/http" "net/http/httptest" + "net/url" "strconv" "strings" "testing" @@ -529,6 +530,43 @@ func TestProxyTarballPassThrough(t *testing.T) { } } +// TestProxyPassThroughRewritesHost guards the fix for tarball downloads failing +// with 403. NewSingleHostReverseProxy does not rewrite the Host header, so +// without the Director wrapper the upstream registry receives the local proxy's +// Host (e.g. 127.0.0.1:PORT) and a CDN that routes on Host rejects it. Assert +// the passed-through request carries the upstream host, not the proxy's. +func TestProxyPassThroughRewritesHost(t *testing.T) { + var gotHost string + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotHost = r.Host + w.Write([]byte("ok")) //nolint:errcheck,gosec + })) + defer upstream.Close() + + proxy, _ := NewProxy(ProxyConfig{ + Policy: Policy{MinReleaseAge: 72 * time.Hour}, + UpstreamURL: upstream.URL, + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + addr, _ := proxy.Start(ctx) + defer proxy.Close() //nolint:errcheck,gosec + + resp, err := http.Get("http://" + addr + "/express/-/express-4.18.2.tgz") //nolint:gosec + if err != nil { + t.Fatalf("request: %v", err) + } + defer resp.Body.Close() //nolint:errcheck,gosec + io.Copy(io.Discard, resp.Body) //nolint:errcheck,gosec + + upstreamURL, _ := url.Parse(upstream.URL) + if gotHost != upstreamURL.Host { + t.Errorf("passthrough Host = %q, want upstream host %q (proxy addr was %q)", gotHost, upstreamURL.Host, addr) + } +} + func TestProxyPolicyExclusion(t *testing.T) { now := time.Now() youngTime := now.Add(-1 * time.Hour).UTC().Format(time.RFC3339) @@ -959,3 +997,312 @@ func TestProxyFailOpen_ResponseTooLarge(t *testing.T) { t.Errorf("fail-open should not emit the too-large error; got body %q", string(body)) } } + +func TestProxyAddr(t *testing.T) { + t.Run("empty before Start", func(t *testing.T) { + p, err := NewProxy(ProxyConfig{Policy: Policy{MinReleaseAge: 72 * time.Hour}}) + if err != nil { + t.Fatalf("NewProxy: %v", err) + } + if got := p.Addr(); got != "" { + t.Errorf("Addr() before Start = %q, want empty", got) + } + }) + + t.Run("matches the bound address after Start", func(t *testing.T) { + p, err := NewProxy(ProxyConfig{Policy: Policy{MinReleaseAge: 72 * time.Hour}}) + if err != nil { + t.Fatalf("NewProxy: %v", err) + } + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + addr, err := p.Start(ctx) + if err != nil { + t.Fatalf("Start: %v", err) + } + defer p.Close() //nolint:errcheck,gosec + if p.Addr() != addr { + t.Errorf("Addr() = %q, want %q (the address returned by Start)", p.Addr(), addr) + } + if !strings.HasPrefix(p.Addr(), "127.0.0.1:") { + t.Errorf("Addr() = %q, want a 127.0.0.1 loopback address", p.Addr()) + } + }) +} + +// --- PyPI Simple API (ModePyPI) tests --- + +// pypiSimpleBody builds a PEP 691 Simple API JSON document for a project with +// the given files. Each file is {filename, url, hashes, upload-time}; an empty +// uploadTime omits the field entirely (to exercise the fail-closed path). +func pypiSimpleBody(name string, files []struct{ filename, uploadTime string }) string { + var b strings.Builder + b.WriteString(`{"meta":{"api-version":"1.1"},"name":"` + name + `","files":[`) + for i, f := range files { + if i > 0 { + b.WriteString(",") + } + b.WriteString(`{"filename":"` + f.filename + `","url":"https://files.pythonhosted.org/packages/` + f.filename + `","hashes":{"sha256":"deadbeef"}`) + if f.uploadTime != "" { + b.WriteString(`,"upload-time":"` + f.uploadTime + `"`) + } + b.WriteString("}") + } + b.WriteString(`]}`) + return b.String() +} + +func pypiFilenames(t *testing.T, body []byte) []string { + t.Helper() + var doc struct { + Files []struct { + Filename string `json:"filename"` + } `json:"files"` + } + if err := json.Unmarshal(body, &doc); err != nil { + t.Fatalf("unmarshal pypi body: %v\n%s", err, body) + } + names := make([]string, len(doc.Files)) + for i, f := range doc.Files { + names[i] = f.Filename + } + return names +} + +func TestProxyFilterPyPISimple(t *testing.T) { + now := time.Now() + old := now.Add(-30 * 24 * time.Hour).UTC().Format(time.RFC3339) + young := now.Add(-1 * time.Hour).UTC().Format(time.RFC3339) + + p := &Proxy{policy: Policy{MinReleaseAge: 72 * time.Hour}, mode: ModePyPI} + + t.Run("removes young files keeps old", func(t *testing.T) { + body := pypiSimpleBody("requests", []struct{ filename, uploadTime string }{ + {"requests-2.31.0-py3-none-any.whl", old}, + {"requests-2.32.0-py3-none-any.whl", young}, + }) + filtered, blocked := p.filterPyPISimple([]byte(body), "requests") + names := pypiFilenames(t, filtered) + if len(names) != 1 || names[0] != "requests-2.31.0-py3-none-any.whl" { + t.Errorf("kept files = %v, want only the old wheel", names) + } + if len(blocked) != 1 || blocked[0].Version != "requests-2.32.0-py3-none-any.whl" { + t.Errorf("blocked = %+v, want the young wheel", blocked) + } + }) + + t.Run("all old passes through unchanged", func(t *testing.T) { + body := pypiSimpleBody("flask", []struct{ filename, uploadTime string }{ + {"flask-3.0.0-py3-none-any.whl", old}, + {"flask-3.0.0.tar.gz", old}, + }) + filtered, blocked := p.filterPyPISimple([]byte(body), "flask") + if blocked != nil { + t.Errorf("expected no blocked files, got %+v", blocked) + } + if string(filtered) != body { + t.Errorf("body should be returned unchanged when nothing is filtered") + } + }) + + t.Run("fail-closed on missing upload-time", func(t *testing.T) { + // A file with no upload-time cannot be age-verified, so it must be + // removed — the whole point of the control is age verification. + body := pypiSimpleBody("mystery", []struct{ filename, uploadTime string }{ + {"mystery-1.0.0-py3-none-any.whl", ""}, + {"mystery-0.9.0-py3-none-any.whl", old}, + }) + filtered, blocked := p.filterPyPISimple([]byte(body), "mystery") + names := pypiFilenames(t, filtered) + if len(names) != 1 || names[0] != "mystery-0.9.0-py3-none-any.whl" { + t.Errorf("kept files = %v, want only the datable old wheel", names) + } + if len(blocked) != 1 || blocked[0].Version != "mystery-1.0.0-py3-none-any.whl" { + t.Errorf("blocked = %+v, want the undatable wheel", blocked) + } + }) + + t.Run("preserves untouched file fields", func(t *testing.T) { + body := pypiSimpleBody("requests", []struct{ filename, uploadTime string }{ + {"requests-2.31.0-py3-none-any.whl", old}, + {"requests-2.32.0-py3-none-any.whl", young}, + }) + filtered, _ := p.filterPyPISimple([]byte(body), "requests") + // The surviving file must retain its url and hashes verbatim. + if !strings.Contains(string(filtered), `"url":"https://files.pythonhosted.org/packages/requests-2.31.0-py3-none-any.whl"`) { + t.Errorf("surviving file lost its url field: %s", filtered) + } + if !strings.Contains(string(filtered), `"sha256":"deadbeef"`) { + t.Errorf("surviving file lost its hashes: %s", filtered) + } + // meta and name must round-trip. + if !strings.Contains(string(filtered), `"api-version":"1.1"`) { + t.Errorf("meta field dropped: %s", filtered) + } + }) + + t.Run("invalid JSON returned as-is", func(t *testing.T) { + body := []byte(`not json`) + filtered, blocked := p.filterPyPISimple(body, "x") + if blocked != nil || string(filtered) != string(body) { + t.Errorf("invalid JSON should pass through untouched") + } + }) + + t.Run("no files key returned as-is", func(t *testing.T) { + body := []byte(`{"meta":{"api-version":"1.1"},"name":"x"}`) + filtered, blocked := p.filterPyPISimple(body, "x") + if blocked != nil || string(filtered) != string(body) { + t.Errorf("body without files should pass through untouched") + } + }) + + t.Run("no-timezone upload-time accepted", func(t *testing.T) { + // PEP 700 says RFC 3339, but some mirrors omit the zone; the parser + // accepts the no-zone fallback rather than treating it as undatable. + oldNoZone := now.Add(-30 * 24 * time.Hour).UTC().Format("2006-01-02T15:04:05") + body := pypiSimpleBody("pkg", []struct{ filename, uploadTime string }{ + {"pkg-1.0.0.tar.gz", oldNoZone}, + }) + _, blocked := p.filterPyPISimple([]byte(body), "pkg") + if blocked != nil { + t.Errorf("no-timezone old file should pass, got blocked %+v", blocked) + } + }) +} + +func TestExtractPyPIPackageNameFromPath(t *testing.T) { + tests := []struct { + path string + want string + }{ + {"/simple/requests/", "requests"}, + {"/simple/requests", "requests"}, + {"/simple/Flask/", "Flask"}, + {"/simple/", ""}, // index root + {"/simple", ""}, // index root, no trailing slash + {"/", ""}, // not under /simple/ + {"/other/pkg/", ""}, // wrong prefix + {"/simple/a/b/", ""}, // nested, not a project page + {"/simple/%41/", "A"}, // percent-decoded + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + if got := extractPyPIPackageNameFromPath(tt.path); got != tt.want { + t.Errorf("extractPyPIPackageNameFromPath(%q) = %q, want %q", tt.path, got, tt.want) + } + }) + } +} + +// TestProxyPyPIEndToEnd drives a full pip-style request through the proxy in +// PyPI mode against a mock Simple API upstream, asserting young files are +// filtered, the JSON Accept header is sent upstream, and the response carries +// the PEP 691 content type. +func TestProxyPyPIEndToEnd(t *testing.T) { + now := time.Now() + old := now.Add(-30 * 24 * time.Hour).UTC().Format(time.RFC3339) + young := now.Add(-1 * time.Hour).UTC().Format(time.RFC3339) + + var gotAccept, gotPath string + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotAccept = r.Header.Get("Accept") + gotPath = r.URL.Path + w.Header().Set("Content-Type", pypiSimpleJSONAccept) + body := pypiSimpleBody("requests", []struct{ filename, uploadTime string }{ + {"requests-2.31.0-py3-none-any.whl", old}, + {"requests-2.32.0-py3-none-any.whl", young}, + }) + _, _ = io.WriteString(w, body) + })) + defer upstream.Close() + + proxy, err := NewProxy(ProxyConfig{ + Policy: Policy{MinReleaseAge: 72 * time.Hour}, + Mode: ModePyPI, + UpstreamURL: upstream.URL, + }) + if err != nil { + t.Fatalf("NewProxy: %v", err) + } + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + addr, err := proxy.Start(ctx) + if err != nil { + t.Fatalf("Start: %v", err) + } + defer proxy.Close() //nolint:errcheck,gosec + + resp, err := http.Get("http://" + addr + "/simple/requests/") //nolint:gosec // local test proxy + if err != nil { + t.Fatalf("request: %v", err) + } + defer resp.Body.Close() //nolint:errcheck,gosec + body, _ := io.ReadAll(resp.Body) + + if gotAccept != pypiSimpleJSONAccept { + t.Errorf("upstream Accept = %q, want %q", gotAccept, pypiSimpleJSONAccept) + } + if gotPath != "/simple/requests/" { + t.Errorf("upstream path = %q, want /simple/requests/", gotPath) + } + if ct := resp.Header.Get("Content-Type"); ct != pypiSimpleJSONAccept { + t.Errorf("response Content-Type = %q, want %q", ct, pypiSimpleJSONAccept) + } + names := pypiFilenames(t, body) + if len(names) != 1 || names[0] != "requests-2.31.0-py3-none-any.whl" { + t.Errorf("served files = %v, want only the old wheel", names) + } + if blocked := proxy.Blocked(); len(blocked) != 1 { + t.Errorf("expected 1 blocked file, got %+v", blocked) + } + if proxy.Checked() != 1 { + t.Errorf("expected 1 checked, got %d", proxy.Checked()) + } +} + +// TestProxyPyPIIndexRootPassesThrough ensures the /simple/ index root is not +// treated as a project page (it has no per-package files to filter) and is +// proxied through untouched. +func TestProxyPyPIIndexRootPassesThrough(t *testing.T) { + var servedPath string + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + servedPath = r.URL.Path + _, _ = io.WriteString(w, `{"meta":{"api-version":"1.1"},"projects":[]}`) + })) + defer upstream.Close() + + proxy, _ := NewProxy(ProxyConfig{ + Policy: Policy{MinReleaseAge: 72 * time.Hour}, + Mode: ModePyPI, + UpstreamURL: upstream.URL, + }) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + addr, _ := proxy.Start(ctx) + defer proxy.Close() //nolint:errcheck,gosec + + resp, err := http.Get("http://" + addr + "/simple/") //nolint:gosec // local test proxy + if err != nil { + t.Fatalf("request: %v", err) + } + defer resp.Body.Close() //nolint:errcheck,gosec + io.Copy(io.Discard, resp.Body) //nolint:errcheck,gosec + + if servedPath != "/simple/" { + t.Errorf("upstream path = %q, want /simple/", servedPath) + } + if proxy.Checked() != 0 { + t.Errorf("index root should not count as a checked package, got %d", proxy.Checked()) + } +} + +func TestNewProxyDefaultsPyPIUpstream(t *testing.T) { + p, err := NewProxy(ProxyConfig{Policy: Policy{MinReleaseAge: 72 * time.Hour}, Mode: ModePyPI}) + if err != nil { + t.Fatalf("NewProxy: %v", err) + } + if p.upstreamURL.String() != defaultPyPIIndex { + t.Errorf("PyPI-mode upstream = %q, want %q", p.upstreamURL.String(), defaultPyPIIndex) + } +} diff --git a/internal/supplychain/registry/pypi_test.go b/internal/supplychain/registry/pypi_test.go index b5297502..9eabd767 100644 --- a/internal/supplychain/registry/pypi_test.go +++ b/internal/supplychain/registry/pypi_test.go @@ -150,3 +150,76 @@ func TestPyPIGetPublishDate(t *testing.T) { } }) } + +func TestPyPIGetPublishDates(t *testing.T) { + // A single handler serves metadata for flask and requests, and 404s anything + // else, so one server backs every batch sub-case. + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/pypi/flask/json": + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"releases": {"3.0.0": [{"upload_time_iso_8601": "2023-09-30T12:00:00Z"}]}}`)) + case "/pypi/requests/json": + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"releases": {"2.31.0": [{"upload_time_iso_8601": "2023-05-22T15:12:00Z"}]}}`)) + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer server.Close() + + t.Run("all packages found", func(t *testing.T) { + client := NewPyPIClientWithHTTP(server.Client(), server.URL) + results := client.GetPublishDates(context.Background(), []PackageRequest{ + {Name: "flask", Version: "3.0.0"}, + {Name: "requests", Version: "2.31.0"}, + }) + if len(results) != 2 { + t.Fatalf("expected 2 results, got %d", len(results)) + } + // Results are index-aligned with the input order. + for i, want := range []string{"flask", "requests"} { + if results[i].Name != want { + t.Errorf("results[%d].Name = %q, want %q", i, results[i].Name, want) + } + if results[i].Err != nil { + t.Errorf("results[%d] unexpected error: %v", i, results[i].Err) + } + if results[i].PublishTime.IsZero() { + t.Errorf("results[%d] has zero publish time", i) + } + } + }) + + t.Run("mixed found and missing", func(t *testing.T) { + client := NewPyPIClientWithHTTP(server.Client(), server.URL) + results := client.GetPublishDates(context.Background(), []PackageRequest{ + {Name: "flask", Version: "3.0.0"}, + {Name: "does-not-exist", Version: "1.0.0"}, + }) + if len(results) != 2 { + t.Fatalf("expected 2 results, got %d", len(results)) + } + if results[0].Err != nil { + t.Errorf("flask should resolve, got error: %v", results[0].Err) + } + if results[1].Err == nil { + t.Error("missing package should produce an error, not a silent pass") + } + }) + + t.Run("cancelled context errors every package", func(t *testing.T) { + client := NewPyPIClientWithHTTP(server.Client(), server.URL) + ctx, cancel := context.WithCancel(context.Background()) + cancel() // cancel before the calls run + results := client.GetPublishDates(ctx, []PackageRequest{ + {Name: "flask", Version: "3.0.0"}, + {Name: "requests", Version: "2.31.0"}, + }) + for i, r := range results { + if r.Err == nil { + t.Errorf("results[%d] expected a context error, got nil", i) + } + } + }) +} diff --git a/internal/supplychain/shell_test.go b/internal/supplychain/shell_test.go index ed095f0c..75254b04 100644 --- a/internal/supplychain/shell_test.go +++ b/internal/supplychain/shell_test.go @@ -411,3 +411,48 @@ func TestDetectPipVariants(t *testing.T) { } }) } + +func TestDetectShells(t *testing.T) { + // DetectShells resolves RC paths from os.UserHomeDir(), which honors $HOME on + // Unix but reads %USERPROFILE% on Windows — so these $HOME-driven cases are + // Unix-only. + if runtime.GOOS == goosWindows { + t.Skip("DetectShells home-dir resolution is exercised via $HOME, which is Unix-only") + } + + t.Run("current shell is ordered first", func(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + t.Setenv("SHELL", "/bin/zsh") + // Both RC files exist: bash qualifies via fileExists, zsh via the + // current-shell match. The current shell must be prepended. + for _, f := range []string{".bashrc", ".zshrc"} { + if err := os.WriteFile(filepath.Join(home, f), []byte("# rc\n"), 0o600); err != nil { + t.Fatal(err) + } + } + + shells := DetectShells() + if len(shells) == 0 { + t.Fatal("expected at least one detected shell") + } + if shells[0].Name != "zsh" { + t.Errorf("first shell = %q, want zsh (the current $SHELL)", shells[0].Name) + } + names := make([]string, len(shells)) + for i, s := range shells { + names[i] = s.Name + } + if !slices.Contains(names, "bash") { + t.Errorf("expected bash among detected shells (its .bashrc exists), got %v", names) + } + }) + + t.Run("no RC files and no SHELL yields nothing", func(t *testing.T) { + t.Setenv("HOME", t.TempDir()) + t.Setenv("SHELL", "") + if shells := DetectShells(); len(shells) != 0 { + t.Errorf("expected no shells for an empty home with no $SHELL, got %v", shells) + } + }) +} diff --git a/internal/supplychain/supplychain.go b/internal/supplychain/supplychain.go index cd50043f..feecbc40 100644 --- a/internal/supplychain/supplychain.go +++ b/internal/supplychain/supplychain.go @@ -86,7 +86,10 @@ func ParseDuration(s string) (time.Duration, error) { default: d, err := time.ParseDuration(s) if err != nil { - return 0, fmt.Errorf("invalid duration %q: %w", s, err) + // Don't wrap time.ParseDuration's error: it repeats `invalid duration + // %q` verbatim, producing a doubled message. Give an actionable hint + // about accepted formats instead. + return 0, fmt.Errorf("invalid duration %q: use a number with a unit like 72h, 3d, or 1w", s) } // time.ParseDuration accepts signed values (e.g. "-72h"). A negative // min-age is nonsensical and would effectively disable age enforcement, diff --git a/internal/supplychain/supplychain_test.go b/internal/supplychain/supplychain_test.go index 771458ed..93c20747 100644 --- a/internal/supplychain/supplychain_test.go +++ b/internal/supplychain/supplychain_test.go @@ -72,6 +72,11 @@ func TestClassifySeverity(t *testing.T) { {25 * time.Hour, model.SeverityMedium}, {48 * time.Hour, model.SeverityMedium}, {71 * time.Hour, model.SeverityMedium}, + // At and beyond the threshold the package is old enough to pass: the + // boundary is `age < threshold` (Medium) vs `age >= threshold` (Low), so + // exactly 72h must classify as Low. + {72 * time.Hour, model.SeverityLow}, + {100 * time.Hour, model.SeverityLow}, } for _, tt := range tests { From ef503005af4e96b42ce97ac49f75ecd325f58f7a Mon Sep 17 00:00:00 2001 From: Yiftach Cohen Date: Fri, 5 Jun 2026 09:32:29 +0200 Subject: [PATCH 2/2] fix(supply-chain): populate Allowed() for PyPI proxy; route wrapEcosystemEnforced through loadConfigUpward MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - filterPyPISimple now populates p.allowed with the newest safe version extracted from kept files (parsed from the wheel/sdist filename), so the wrap summary can report "→ 2.31.0 installed" for pip/uv installs instead of the misleading "no older safe version" warning - wrapEcosystemEnforced now calls loadConfigUpward() instead of supplychain.LoadConfig directly, so the unknown-ecosystems warning is emitted consistently with check/status - Adds TestProxyFilterPyPISimple_AllowedPopulated and TestPypiVersionFromFilename Addresses Copilot review comments on PR #212 --- internal/cmd/supply_chain_wrap.go | 15 ++++----- internal/supplychain/proxy.go | 51 ++++++++++++++++++++++++++++ internal/supplychain/proxy_test.go | 53 ++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 9 deletions(-) diff --git a/internal/cmd/supply_chain_wrap.go b/internal/cmd/supply_chain_wrap.go index fe335f8a..ff9d5912 100644 --- a/internal/cmd/supply_chain_wrap.go +++ b/internal/cmd/supply_chain_wrap.go @@ -669,20 +669,17 @@ func resolveWrapPolicy() supplychain.Policy { } // wrapEcosystemEnforced reports whether the config (searched upward from the -// current directory) scopes enforcement to exclude this package manager's -// ecosystem. It loads the config best-effort: any load error or absent config -// means "enforce" (fail safe), matching resolveWrapPolicy's posture. Pass the -// canonical PM name so versioned pip variants resolve correctly. +// current directory) scopes enforcement to include this package manager's +// ecosystem. It uses loadConfigUpward so unknown-ecosystem warnings are emitted +// consistently with check/status. Any load error or absent config means +// "enforce" (fail safe). Pass the canonical PM name so versioned pip variants +// resolve correctly. func wrapEcosystemEnforced(canonicalPMName string) bool { eco := pmToEcosystem(canonicalPMName) if eco == "" { return true } - dir := supplychain.FindConfigDir(".") - if dir == "" { - return true - } - cfg, err := supplychain.LoadConfig(dir) + cfg, _, err := loadConfigUpward(".") if err != nil || cfg == nil { return true } diff --git a/internal/supplychain/proxy.go b/internal/supplychain/proxy.go index d8d21a4d..84a785df 100644 --- a/internal/supplychain/proxy.go +++ b/internal/supplychain/proxy.go @@ -564,6 +564,38 @@ func (p *Proxy) filterPyPISimple(body []byte, pkgName string) ([]byte, []Blocked return body, nil } + // Populate the allowed map with the newest safe version so the wrap summary + // can report "→ 2.30.0 installed" instead of "no older safe version". PyPI + // file objects carry no explicit "version" field, so we parse it from the + // wheel/sdist filename (e.g. "requests-2.30.0-py3-none-any.whl" → "2.30.0"). + // We skip pre-releases (alpha/beta/rc) and pick the newest stable version + // still in kept; ties go to the last one encountered (upload order is newest- + // first in the Simple API, so the first non-prerelease is the right pick). + if p.allowed != nil { + var bestVersion string + var bestAge time.Duration + for _, f := range kept { + fname := jsonString(f["filename"]) + ver := pypiVersionFromFilename(fname) + if ver == "" || isPrerelease(ver) { + continue + } + age, ok := pypiFileAge(f["upload-time"], now) + if !ok { + continue + } + if bestVersion == "" || age < bestAge { + bestVersion = ver + bestAge = age + } + } + if bestVersion != "" { + p.allowedMu.Lock() + p.allowed[pkgName] = bestVersion + p.allowedMu.Unlock() + } + } + newFiles, err := json.Marshal(kept) if err != nil { return body, blocked @@ -602,6 +634,25 @@ func pypiFileAge(raw json.RawMessage, now time.Time) (time.Duration, bool) { return now.Sub(t), true } +// pypiVersionFromFilename extracts the version component from a wheel or sdist +// filename. Wheel names follow "{name}-{version}-..." and sdists follow +// "{name}-{version}.tar.gz" (or .zip). Returns "" if the pattern does not match. +func pypiVersionFromFilename(filename string) string { + // Strip the file extension first so split position is consistent. + name := filename + for _, ext := range []string{".whl", ".tar.gz", ".zip", ".tar.bz2", ".egg"} { + if strings.HasSuffix(name, ext) { + name = name[:len(name)-len(ext)] + break + } + } + parts := strings.SplitN(name, "-", 3) + if len(parts) < 2 { + return "" + } + return parts[1] +} + // jsonString decodes a JSON string value, returning "" for absent or non-string // values rather than erroring — callers treat both as "field not usable". func jsonString(raw json.RawMessage) string { diff --git a/internal/supplychain/proxy_test.go b/internal/supplychain/proxy_test.go index 0f7ef13c..d0031620 100644 --- a/internal/supplychain/proxy_test.go +++ b/internal/supplychain/proxy_test.go @@ -1171,6 +1171,59 @@ func TestProxyFilterPyPISimple(t *testing.T) { }) } +func TestProxyFilterPyPISimple_AllowedPopulated(t *testing.T) { + // Proxy.Allowed() must be populated with the newest stable safe version so + // the wrap summary can report "→ 2.31.0 installed" rather than "no older + // safe version" for pip/uv installs. + now := time.Now() + old1 := now.Add(-60 * 24 * time.Hour).UTC().Format(time.RFC3339) // 2.30.0 + old2 := now.Add(-30 * 24 * time.Hour).UTC().Format(time.RFC3339) // 2.31.0 — newer of the two safe versions + young := now.Add(-1 * time.Hour).UTC().Format(time.RFC3339) // 2.32.0 — blocked + + p, err := NewProxy(ProxyConfig{Policy: Policy{MinReleaseAge: 72 * time.Hour}, Mode: ModePyPI}) + if err != nil { + t.Fatalf("NewProxy: %v", err) + } + + body := pypiSimpleBody("requests", []struct{ filename, uploadTime string }{ + {"requests-2.30.0-py3-none-any.whl", old1}, + {"requests-2.31.0-py3-none-any.whl", old2}, + {"requests-2.32.0-py3-none-any.whl", young}, + }) + _, blocked := p.filterPyPISimple([]byte(body), "requests") + if len(blocked) != 1 { + t.Fatalf("expected 1 blocked file, got %d: %+v", len(blocked), blocked) + } + + allowed := p.Allowed() + if len(allowed) != 1 || allowed[0].Name != "requests" || allowed[0].Version != "2.31.0" { + t.Errorf("Allowed() = %+v, want [{requests 2.31.0}]", allowed) + } +} + +func TestPypiVersionFromFilename(t *testing.T) { + tests := []struct { + filename string + want string + }{ + {"requests-2.31.0-py3-none-any.whl", "2.31.0"}, + {"requests-2.31.0.tar.gz", "2.31.0"}, + {"Flask-3.0.0-py3-none-any.whl", "3.0.0"}, + {"numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.whl", "1.26.2"}, + {"setuptools-68.0.0.tar.gz", "68.0.0"}, + {"pkg-1.0.0a1-py3-none-any.whl", "1.0.0a1"}, + {"noversion.whl", ""}, + {"", ""}, + } + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + if got := pypiVersionFromFilename(tt.filename); got != tt.want { + t.Errorf("pypiVersionFromFilename(%q) = %q, want %q", tt.filename, got, tt.want) + } + }) + } +} + func TestExtractPyPIPackageNameFromPath(t *testing.T) { tests := []struct { path string