diff --git a/internal/supplychain/check/check.go b/internal/supplychain/check/check.go index 7524e0c..1c45ecb 100644 --- a/internal/supplychain/check/check.go +++ b/internal/supplychain/check/check.go @@ -145,17 +145,23 @@ func detectEcosystemFromPath(path string) supplychain.Ecosystem { } // isRequirementsFile reports whether a lowercased path is a pip requirements -// file. It matches the conventional layouts (requirements.txt, requirements-dev.txt, -// and the requirements/.txt directory split) by requiring both a -// "requirements" path segment and a .txt extension. The .txt guard is what keeps -// pip-tools input files (requirements.in, which hold unpinned specifiers -// ParsePipRequirements would silently drop) from being misclassified as a pinned -// lockfile and yielding a false "all clear". +// file. It matches the conventional layouts — a "requirements*.txt" basename +// (requirements.txt, requirements-dev.txt) or any *.txt under a "requirements/" +// directory split — rather than a loose "requirements" substring, so unrelated +// files like "myrequirements.txt" are not misclassified as a pinned lockfile +// (which would parse empty and yield a false "all clear"). The .txt guard also +// keeps pip-tools input files (requirements.in, which hold unpinned specifiers +// ParsePipRequirements would silently drop) out. func isRequirementsFile(lowerPath string) bool { if !strings.HasSuffix(lowerPath, ".txt") { return false } - return strings.Contains(filepath.ToSlash(lowerPath), "requirements") + slashed := filepath.ToSlash(lowerPath) + base := filepath.Base(slashed) + if strings.HasPrefix(base, "requirements") { + return true + } + return strings.Contains(slashed, "requirements/") } func diffEntries(current, base []PackageEntry) []PackageEntry { diff --git a/internal/supplychain/check/check_test.go b/internal/supplychain/check/check_test.go index 89c6f4a..d08ed8f 100644 --- a/internal/supplychain/check/check_test.go +++ b/internal/supplychain/check/check_test.go @@ -95,6 +95,12 @@ func TestDetectEcosystemFromPath(t *testing.T) { {"requirements.txt", supplychain.EcosystemPip}, {"/project/requirements-dev.txt", supplychain.EcosystemPip}, {"/project/requirements/base.txt", supplychain.EcosystemPip}, + // A "requirements" substring that is not a path segment (e.g. + // "myrequirements.txt") must NOT be classified as pip — it would parse + // empty and report a false "all clear". Only a "requirements*" basename + // or a file under a "requirements/" directory qualifies. + {"myrequirements.txt", supplychain.EcosystemNPM}, + {"/project/build-requirements-notes.txt", supplychain.EcosystemNPM}, // requirements.in (pip-tools input, unpinned) must NOT be treated as a // pinned requirements lockfile — doing so would silently skip every // unpinned line and report a false "all clear". It falls through to the diff --git a/internal/supplychain/check/pip.go b/internal/supplychain/check/pip.go index 278b197..effd3bb 100644 --- a/internal/supplychain/check/pip.go +++ b/internal/supplychain/check/pip.go @@ -35,6 +35,13 @@ func ParsePipRequirements(path string) ([]PackageEntry, error) { var entries []PackageEntry scanner := bufio.NewScanner(bytes.NewReader(data)) + // A single requirements.txt line can carry many "--hash=sha256:..." entries + // or long URLs and exceed bufio.Scanner's default 64KB token limit, which + // would otherwise surface as scanner.Err() = "token too long" and turn a + // valid lockfile into a hard failure. Allow one line to grow up to the same + // maxLockfileSize cap readLockfile already enforces, so parsing stays robust + // without reintroducing an unbounded allocation. + scanner.Buffer(make([]byte, 0, bufio.MaxScanTokenSize), maxLockfileSize) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) diff --git a/internal/supplychain/check/pip_test.go b/internal/supplychain/check/pip_test.go index b473805..b5787c9 100644 --- a/internal/supplychain/check/pip_test.go +++ b/internal/supplychain/check/pip_test.go @@ -1,8 +1,11 @@ package check import ( + "bufio" + "os" "path/filepath" "sort" + "strings" "testing" ) @@ -76,6 +79,35 @@ func TestParsePipRequirements(t *testing.T) { t.Error("expected error for missing file") } }) + + t.Run("parses lines longer than the default scanner token", func(t *testing.T) { + // A pinned requirement followed by many "--hash" entries on a single + // line-continuation-free line can exceed bufio.Scanner's default 64KB + // token limit. Build a line well past that to assert the raised buffer + // keeps such a lockfile parseable instead of failing "token too long". + var sb strings.Builder + sb.WriteString("flask==3.0.0 \\") + for i := 0; i < 2000; i++ { + sb.WriteString(" --hash=sha256:0000000000000000000000000000000000000000000000000000000000000000") + } + if sb.Len() <= bufio.MaxScanTokenSize { + t.Fatalf("test line is %d bytes, expected > %d to exercise the raised buffer", sb.Len(), bufio.MaxScanTokenSize) + } + + dir := t.TempDir() + path := filepath.Join(dir, "requirements.txt") + if err := os.WriteFile(path, []byte(sb.String()+"\n"), 0o600); err != nil { + t.Fatalf("writing fixture: %v", err) + } + + entries, err := ParsePipRequirements(path) + if err != nil { + t.Fatalf("unexpected error on long line: %v", err) + } + if len(entries) != 1 || entries[0].Name != "flask" || entries[0].Version != "3.0.0" { + t.Errorf("expected [flask@3.0.0], got %v", entries) + } + }) } func TestParsePipRequirement(t *testing.T) { diff --git a/internal/supplychain/detect_test.go b/internal/supplychain/detect_test.go index f47426e..daaa4c6 100644 --- a/internal/supplychain/detect_test.go +++ b/internal/supplychain/detect_test.go @@ -84,7 +84,7 @@ func TestDetectEcosystems(t *testing.T) { // This ENOTDIR behavior is Unix-only: on Windows, stat'ing a path under a // non-directory returns ERROR_PATH_NOT_FOUND, which os.IsNotExist reports as // true, so the error is (correctly) treated as "lockfile absent" there. - if runtime.GOOS == "windows" { + if runtime.GOOS == goosWindows { t.Skip("ENOTDIR-style stat errors are not reproducible on Windows (maps to IsNotExist)") } base := t.TempDir() diff --git a/internal/supplychain/registry/pypi.go b/internal/supplychain/registry/pypi.go index 24e5073..82bddcc 100644 --- a/internal/supplychain/registry/pypi.go +++ b/internal/supplychain/registry/pypi.go @@ -60,6 +60,12 @@ func NewPyPIClientWithHTTP(httpClient *http.Client, baseURL string) *PyPIClient if baseURL == "" { baseURL = defaultPyPIURL } + // Guard the exported constructor against a nil client: callers that pass nil + // would otherwise hit a nil-pointer panic at c.httpClient.Do(). Default to + // the same timeout-configured client NewPyPIClient uses. + if httpClient == nil { + httpClient = &http.Client{Timeout: 30 * time.Second} + } return &PyPIClient{ httpClient: httpClient, baseURL: baseURL, diff --git a/internal/supplychain/registry/pypi_test.go b/internal/supplychain/registry/pypi_test.go index ecc24e8..b529750 100644 --- a/internal/supplychain/registry/pypi_test.go +++ b/internal/supplychain/registry/pypi_test.go @@ -110,6 +110,20 @@ func TestPyPIGetPublishDate(t *testing.T) { } }) + t.Run("nil http client defaults instead of panicking", func(t *testing.T) { + // NewPyPIClientWithHTTP is exported, so a caller may pass nil. It must + // default to a usable client rather than leave httpClient nil and panic + // at httpClient.Do(). Asserting the field is populated keeps the test + // hermetic (no real network call). + client := NewPyPIClientWithHTTP(nil, "") + if client.httpClient == nil { + t.Fatal("expected a defaulted http client, got nil") + } + if client.baseURL != defaultPyPIURL { + t.Errorf("expected baseURL to default to %q, got %q", defaultPyPIURL, client.baseURL) + } + }) + t.Run("caches responses", func(t *testing.T) { calls := 0 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { diff --git a/internal/supplychain/shell.go b/internal/supplychain/shell.go index 651a14e..82e6947 100644 --- a/internal/supplychain/shell.go +++ b/internal/supplychain/shell.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "regexp" + "runtime" "slices" "strings" ) @@ -15,6 +16,10 @@ const ( markerEnd = "# <<< armis-cli supply-chain <<<" ) +// goosWindows is runtime.GOOS on Windows. Centralized so the several +// platform guards across this package (and its tests) share one literal. +const goosWindows = "windows" + // validPMName bounds package-manager names to a safe shell identifier: a // lowercase letter followed by lowercase letters, digits, or hyphens, with an // optional trailing `.` version suffix so versioned pip variants @@ -320,9 +325,27 @@ func DetectPipVariants() []string { continue } name := entry.Name() - if pipExecutable.MatchString(name) { - seen[name] = true + if !pipExecutable.MatchString(name) { + continue + } + // On Unix, a pip-named entry on PATH with no execute bit (a stray data + // file or a non-exec script) would yield a wrapper that later fails at + // exec.LookPath with a confusing error, so require at least one execute + // bit before treating it as a real pip command. Info() reports the + // entry's own mode (lstat semantics); a symlink to a real pip keeps its + // 0o777 link bits and so still passes, matching what the user can run. + // + // Skip this check on Windows: there is no execute-bit concept there + // (executability is governed by file extension via PATHEXT), and + // os.FileMode.Perm never sets 0o111, so the filter would reject every + // real pip and collapse detection to the ["pip"] fallback. + if runtime.GOOS != goosWindows { + info, err := entry.Info() + if err != nil || info.Mode().Perm()&0o111 == 0 { + continue + } } + seen[name] = true } } diff --git a/internal/supplychain/shell_test.go b/internal/supplychain/shell_test.go index 0f56157..ed095f0 100644 --- a/internal/supplychain/shell_test.go +++ b/internal/supplychain/shell_test.go @@ -4,6 +4,7 @@ import ( "os" "path/filepath" "runtime" + "slices" "strings" "testing" ) @@ -183,7 +184,7 @@ func TestInjectAndRemoveFunctions(t *testing.T) { } func TestRemoveFunctions_PreservesPermissions(t *testing.T) { - if runtime.GOOS == "windows" { + if runtime.GOOS == goosWindows { t.Skip("Unix file permissions not supported on Windows") } @@ -335,7 +336,7 @@ func TestDetectPipVariants(t *testing.T) { got := DetectPipVariants() want := []string{pipExe, "pip3", "pip3.12"} - if !equalStrings(got, want) { + if !slices.Equal(got, want) { t.Errorf("DetectPipVariants() = %v, want %v", got, want) } }) @@ -368,6 +369,32 @@ func TestDetectPipVariants(t *testing.T) { } }) + t.Run("ignores non-executable pip-named files", func(t *testing.T) { + if runtime.GOOS == goosWindows { + // Windows has no execute-bit concept (executability is governed by + // file extension), so DetectPipVariants does not filter on mode there + // and this Unix-only behavior cannot be exercised. + t.Skip("execute-bit filtering is Unix-only") + } + + dir := t.TempDir() + // An executable pip alongside a pip3 that lacks any execute bit (a stray + // data file). Only the runnable one should be wrapped — a wrapper for the + // non-exec file would later fail at exec.LookPath. + if err := os.WriteFile(filepath.Join(dir, pipExe), []byte{}, 0o755); err != nil { //nolint:gosec + t.Fatalf("seed %s: %v", pipExe, err) + } + if err := os.WriteFile(filepath.Join(dir, "pip3"), []byte{}, 0o600); err != nil { + t.Fatalf("seed pip3: %v", err) + } + t.Setenv("PATH", dir) + + got := DetectPipVariants() + if !slices.Equal(got, []string{pipExe}) { + t.Errorf("expected only the executable [pip], got %v", got) + } + }) + t.Run("falls back to pip when none found", func(t *testing.T) { t.Setenv("PATH", t.TempDir()) got := DetectPipVariants() @@ -384,15 +411,3 @@ func TestDetectPipVariants(t *testing.T) { } }) } - -func equalStrings(a, b []string) bool { - if len(a) != len(b) { - return false - } - for i := range a { - if a[i] != b[i] { - return false - } - } - return true -}