From b80d820197ab9718ae8fd329206c8441503663a1 Mon Sep 17 00:00:00 2001 From: Yiftach Cohen Date: Wed, 3 Jun 2026 19:24:16 +0200 Subject: [PATCH 1/2] [PPSC-877] fix(supply-chain): address PR #209 review comments Resolve four correctness edge cases raised by Copilot on PR #209 (merged before they were addressed), all in the Python release-age enforcement path: - pip.go: raise the bufio.Scanner token buffer to maxLockfileSize so requirements.txt lines bloated with --hash entries no longer fail with "token too long" - shell.go: DetectPipVariants now requires an execute bit, so a non-runnable pip-named file on PATH no longer produces a dead wrapper - check.go: isRequirementsFile matches a "requirements*" basename or a "requirements/" directory segment instead of a loose substring, so files like myrequirements.txt are not misclassified as pip (which would parse empty and report a false "all clear") - pypi.go: NewPyPIClientWithHTTP nil-guards the injected client to avoid a nil-pointer panic at httpClient.Do() Also replace a hand-rolled equalStrings test helper with slices.Equal to clear a gosec G602 false positive surfaced by the new test. Adds regression tests for each fix. --- internal/supplychain/check/check.go | 20 ++++++++----- internal/supplychain/check/check_test.go | 6 ++++ internal/supplychain/check/pip.go | 7 +++++ internal/supplychain/check/pip_test.go | 32 ++++++++++++++++++++ internal/supplychain/registry/pypi.go | 6 ++++ internal/supplychain/registry/pypi_test.go | 14 +++++++++ internal/supplychain/shell.go | 15 ++++++++-- internal/supplychain/shell_test.go | 34 +++++++++++++--------- 8 files changed, 112 insertions(+), 22 deletions(-) diff --git a/internal/supplychain/check/check.go b/internal/supplychain/check/check.go index 7524e0c..1c45ecb 100644 --- a/internal/supplychain/check/check.go +++ b/internal/supplychain/check/check.go @@ -145,17 +145,23 @@ func detectEcosystemFromPath(path string) supplychain.Ecosystem { } // isRequirementsFile reports whether a lowercased path is a pip requirements -// file. It matches the conventional layouts (requirements.txt, requirements-dev.txt, -// and the requirements/.txt directory split) by requiring both a -// "requirements" path segment and a .txt extension. The .txt guard is what keeps -// pip-tools input files (requirements.in, which hold unpinned specifiers -// ParsePipRequirements would silently drop) from being misclassified as a pinned -// lockfile and yielding a false "all clear". +// file. It matches the conventional layouts — a "requirements*.txt" basename +// (requirements.txt, requirements-dev.txt) or any *.txt under a "requirements/" +// directory split — rather than a loose "requirements" substring, so unrelated +// files like "myrequirements.txt" are not misclassified as a pinned lockfile +// (which would parse empty and yield a false "all clear"). The .txt guard also +// keeps pip-tools input files (requirements.in, which hold unpinned specifiers +// ParsePipRequirements would silently drop) out. func isRequirementsFile(lowerPath string) bool { if !strings.HasSuffix(lowerPath, ".txt") { return false } - return strings.Contains(filepath.ToSlash(lowerPath), "requirements") + slashed := filepath.ToSlash(lowerPath) + base := filepath.Base(slashed) + if strings.HasPrefix(base, "requirements") { + return true + } + return strings.Contains(slashed, "requirements/") } func diffEntries(current, base []PackageEntry) []PackageEntry { diff --git a/internal/supplychain/check/check_test.go b/internal/supplychain/check/check_test.go index 89c6f4a..d08ed8f 100644 --- a/internal/supplychain/check/check_test.go +++ b/internal/supplychain/check/check_test.go @@ -95,6 +95,12 @@ func TestDetectEcosystemFromPath(t *testing.T) { {"requirements.txt", supplychain.EcosystemPip}, {"/project/requirements-dev.txt", supplychain.EcosystemPip}, {"/project/requirements/base.txt", supplychain.EcosystemPip}, + // A "requirements" substring that is not a path segment (e.g. + // "myrequirements.txt") must NOT be classified as pip — it would parse + // empty and report a false "all clear". Only a "requirements*" basename + // or a file under a "requirements/" directory qualifies. + {"myrequirements.txt", supplychain.EcosystemNPM}, + {"/project/build-requirements-notes.txt", supplychain.EcosystemNPM}, // requirements.in (pip-tools input, unpinned) must NOT be treated as a // pinned requirements lockfile — doing so would silently skip every // unpinned line and report a false "all clear". It falls through to the diff --git a/internal/supplychain/check/pip.go b/internal/supplychain/check/pip.go index 278b197..effd3bb 100644 --- a/internal/supplychain/check/pip.go +++ b/internal/supplychain/check/pip.go @@ -35,6 +35,13 @@ func ParsePipRequirements(path string) ([]PackageEntry, error) { var entries []PackageEntry scanner := bufio.NewScanner(bytes.NewReader(data)) + // A single requirements.txt line can carry many "--hash=sha256:..." entries + // or long URLs and exceed bufio.Scanner's default 64KB token limit, which + // would otherwise surface as scanner.Err() = "token too long" and turn a + // valid lockfile into a hard failure. Allow one line to grow up to the same + // maxLockfileSize cap readLockfile already enforces, so parsing stays robust + // without reintroducing an unbounded allocation. + scanner.Buffer(make([]byte, 0, bufio.MaxScanTokenSize), maxLockfileSize) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) diff --git a/internal/supplychain/check/pip_test.go b/internal/supplychain/check/pip_test.go index b473805..b5787c9 100644 --- a/internal/supplychain/check/pip_test.go +++ b/internal/supplychain/check/pip_test.go @@ -1,8 +1,11 @@ package check import ( + "bufio" + "os" "path/filepath" "sort" + "strings" "testing" ) @@ -76,6 +79,35 @@ func TestParsePipRequirements(t *testing.T) { t.Error("expected error for missing file") } }) + + t.Run("parses lines longer than the default scanner token", func(t *testing.T) { + // A pinned requirement followed by many "--hash" entries on a single + // line-continuation-free line can exceed bufio.Scanner's default 64KB + // token limit. Build a line well past that to assert the raised buffer + // keeps such a lockfile parseable instead of failing "token too long". + var sb strings.Builder + sb.WriteString("flask==3.0.0 \\") + for i := 0; i < 2000; i++ { + sb.WriteString(" --hash=sha256:0000000000000000000000000000000000000000000000000000000000000000") + } + if sb.Len() <= bufio.MaxScanTokenSize { + t.Fatalf("test line is %d bytes, expected > %d to exercise the raised buffer", sb.Len(), bufio.MaxScanTokenSize) + } + + dir := t.TempDir() + path := filepath.Join(dir, "requirements.txt") + if err := os.WriteFile(path, []byte(sb.String()+"\n"), 0o600); err != nil { + t.Fatalf("writing fixture: %v", err) + } + + entries, err := ParsePipRequirements(path) + if err != nil { + t.Fatalf("unexpected error on long line: %v", err) + } + if len(entries) != 1 || entries[0].Name != "flask" || entries[0].Version != "3.0.0" { + t.Errorf("expected [flask@3.0.0], got %v", entries) + } + }) } func TestParsePipRequirement(t *testing.T) { diff --git a/internal/supplychain/registry/pypi.go b/internal/supplychain/registry/pypi.go index 24e5073..82bddcc 100644 --- a/internal/supplychain/registry/pypi.go +++ b/internal/supplychain/registry/pypi.go @@ -60,6 +60,12 @@ func NewPyPIClientWithHTTP(httpClient *http.Client, baseURL string) *PyPIClient if baseURL == "" { baseURL = defaultPyPIURL } + // Guard the exported constructor against a nil client: callers that pass nil + // would otherwise hit a nil-pointer panic at c.httpClient.Do(). Default to + // the same timeout-configured client NewPyPIClient uses. + if httpClient == nil { + httpClient = &http.Client{Timeout: 30 * time.Second} + } return &PyPIClient{ httpClient: httpClient, baseURL: baseURL, diff --git a/internal/supplychain/registry/pypi_test.go b/internal/supplychain/registry/pypi_test.go index ecc24e8..b529750 100644 --- a/internal/supplychain/registry/pypi_test.go +++ b/internal/supplychain/registry/pypi_test.go @@ -110,6 +110,20 @@ func TestPyPIGetPublishDate(t *testing.T) { } }) + t.Run("nil http client defaults instead of panicking", func(t *testing.T) { + // NewPyPIClientWithHTTP is exported, so a caller may pass nil. It must + // default to a usable client rather than leave httpClient nil and panic + // at httpClient.Do(). Asserting the field is populated keeps the test + // hermetic (no real network call). + client := NewPyPIClientWithHTTP(nil, "") + if client.httpClient == nil { + t.Fatal("expected a defaulted http client, got nil") + } + if client.baseURL != defaultPyPIURL { + t.Errorf("expected baseURL to default to %q, got %q", defaultPyPIURL, client.baseURL) + } + }) + t.Run("caches responses", func(t *testing.T) { calls := 0 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { diff --git a/internal/supplychain/shell.go b/internal/supplychain/shell.go index 651a14e..a14e941 100644 --- a/internal/supplychain/shell.go +++ b/internal/supplychain/shell.go @@ -320,9 +320,20 @@ func DetectPipVariants() []string { continue } name := entry.Name() - if pipExecutable.MatchString(name) { - seen[name] = true + if !pipExecutable.MatchString(name) { + continue + } + // A pip-named entry on PATH with no execute bit (a stray data file or + // a non-exec script) would yield a wrapper that later fails at + // exec.LookPath with a confusing error, so require at least one + // execute bit before treating it as a real pip command. Info() reports + // the entry's own mode (lstat semantics); a symlink to a real pip keeps + // its 0o777 link bits and so still passes, matching what the user can run. + info, err := entry.Info() + if err != nil || info.Mode().Perm()&0o111 == 0 { + continue } + seen[name] = true } } diff --git a/internal/supplychain/shell_test.go b/internal/supplychain/shell_test.go index 0f56157..9c48f81 100644 --- a/internal/supplychain/shell_test.go +++ b/internal/supplychain/shell_test.go @@ -4,6 +4,7 @@ import ( "os" "path/filepath" "runtime" + "slices" "strings" "testing" ) @@ -335,7 +336,7 @@ func TestDetectPipVariants(t *testing.T) { got := DetectPipVariants() want := []string{pipExe, "pip3", "pip3.12"} - if !equalStrings(got, want) { + if !slices.Equal(got, want) { t.Errorf("DetectPipVariants() = %v, want %v", got, want) } }) @@ -368,6 +369,25 @@ func TestDetectPipVariants(t *testing.T) { } }) + t.Run("ignores non-executable pip-named files", func(t *testing.T) { + dir := t.TempDir() + // An executable pip alongside a pip3 that lacks any execute bit (a stray + // data file). Only the runnable one should be wrapped — a wrapper for the + // non-exec file would later fail at exec.LookPath. + if err := os.WriteFile(filepath.Join(dir, pipExe), []byte{}, 0o755); err != nil { //nolint:gosec + t.Fatalf("seed %s: %v", pipExe, err) + } + if err := os.WriteFile(filepath.Join(dir, "pip3"), []byte{}, 0o600); err != nil { + t.Fatalf("seed pip3: %v", err) + } + t.Setenv("PATH", dir) + + got := DetectPipVariants() + if !slices.Equal(got, []string{pipExe}) { + t.Errorf("expected only the executable [pip], got %v", got) + } + }) + t.Run("falls back to pip when none found", func(t *testing.T) { t.Setenv("PATH", t.TempDir()) got := DetectPipVariants() @@ -384,15 +404,3 @@ func TestDetectPipVariants(t *testing.T) { } }) } - -func equalStrings(a, b []string) bool { - if len(a) != len(b) { - return false - } - for i := range a { - if a[i] != b[i] { - return false - } - } - return true -} From 49631b138c7b69e2caae748f0a40c49d66617fb3 Mon Sep 17 00:00:00 2001 From: Yiftach Cohen Date: Wed, 3 Jun 2026 19:39:51 +0200 Subject: [PATCH 2/2] [PPSC-877] fix(supply-chain): skip pip execute-bit check on Windows The execute-bit filter added to DetectPipVariants (Perm()&0o111) assumes Unix permission semantics. On Windows os.FileMode.Perm never sets the execute bits (executability is governed by file extension via PATHEXT), so the filter rejected every real pip and collapsed detection to the ["pip"] fallback, failing TestDetectPipVariants on windows-latest CI. Gate the execute-bit check behind runtime.GOOS != goosWindows so Windows keeps its prior name-match behavior while Unix retains the improvement, and skip the Unix-only "ignores non-executable" subtest on Windows. Introduce a shared goosWindows constant for the package's platform guards. --- internal/supplychain/detect_test.go | 2 +- internal/supplychain/shell.go | 30 ++++++++++++++++++++--------- internal/supplychain/shell_test.go | 9 ++++++++- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/internal/supplychain/detect_test.go b/internal/supplychain/detect_test.go index f47426e..daaa4c6 100644 --- a/internal/supplychain/detect_test.go +++ b/internal/supplychain/detect_test.go @@ -84,7 +84,7 @@ func TestDetectEcosystems(t *testing.T) { // This ENOTDIR behavior is Unix-only: on Windows, stat'ing a path under a // non-directory returns ERROR_PATH_NOT_FOUND, which os.IsNotExist reports as // true, so the error is (correctly) treated as "lockfile absent" there. - if runtime.GOOS == "windows" { + if runtime.GOOS == goosWindows { t.Skip("ENOTDIR-style stat errors are not reproducible on Windows (maps to IsNotExist)") } base := t.TempDir() diff --git a/internal/supplychain/shell.go b/internal/supplychain/shell.go index a14e941..82e6947 100644 --- a/internal/supplychain/shell.go +++ b/internal/supplychain/shell.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "regexp" + "runtime" "slices" "strings" ) @@ -15,6 +16,10 @@ const ( markerEnd = "# <<< armis-cli supply-chain <<<" ) +// goosWindows is runtime.GOOS on Windows. Centralized so the several +// platform guards across this package (and its tests) share one literal. +const goosWindows = "windows" + // validPMName bounds package-manager names to a safe shell identifier: a // lowercase letter followed by lowercase letters, digits, or hyphens, with an // optional trailing `.` version suffix so versioned pip variants @@ -323,15 +328,22 @@ func DetectPipVariants() []string { if !pipExecutable.MatchString(name) { continue } - // A pip-named entry on PATH with no execute bit (a stray data file or - // a non-exec script) would yield a wrapper that later fails at - // exec.LookPath with a confusing error, so require at least one - // execute bit before treating it as a real pip command. Info() reports - // the entry's own mode (lstat semantics); a symlink to a real pip keeps - // its 0o777 link bits and so still passes, matching what the user can run. - info, err := entry.Info() - if err != nil || info.Mode().Perm()&0o111 == 0 { - continue + // On Unix, a pip-named entry on PATH with no execute bit (a stray data + // file or a non-exec script) would yield a wrapper that later fails at + // exec.LookPath with a confusing error, so require at least one execute + // bit before treating it as a real pip command. Info() reports the + // entry's own mode (lstat semantics); a symlink to a real pip keeps its + // 0o777 link bits and so still passes, matching what the user can run. + // + // Skip this check on Windows: there is no execute-bit concept there + // (executability is governed by file extension via PATHEXT), and + // os.FileMode.Perm never sets 0o111, so the filter would reject every + // real pip and collapse detection to the ["pip"] fallback. + if runtime.GOOS != goosWindows { + info, err := entry.Info() + if err != nil || info.Mode().Perm()&0o111 == 0 { + continue + } } seen[name] = true } diff --git a/internal/supplychain/shell_test.go b/internal/supplychain/shell_test.go index 9c48f81..ed095f0 100644 --- a/internal/supplychain/shell_test.go +++ b/internal/supplychain/shell_test.go @@ -184,7 +184,7 @@ func TestInjectAndRemoveFunctions(t *testing.T) { } func TestRemoveFunctions_PreservesPermissions(t *testing.T) { - if runtime.GOOS == "windows" { + if runtime.GOOS == goosWindows { t.Skip("Unix file permissions not supported on Windows") } @@ -370,6 +370,13 @@ func TestDetectPipVariants(t *testing.T) { }) t.Run("ignores non-executable pip-named files", func(t *testing.T) { + if runtime.GOOS == goosWindows { + // Windows has no execute-bit concept (executability is governed by + // file extension), so DetectPipVariants does not filter on mode there + // and this Unix-only behavior cannot be exercised. + t.Skip("execute-bit filtering is Unix-only") + } + dir := t.TempDir() // An executable pip alongside a pip3 that lacks any execute bit (a stray // data file). Only the runnable one should be wrapped — a wrapper for the