diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..952a2ef --- /dev/null +++ b/.gitattributes @@ -0,0 +1,16 @@ +# Keep line endings deterministic regardless of the contributor's OS or +# core.autocrlf setting. + +# Go source must be LF: gofmt and the toolchain expect Unix line endings. +*.go text eol=lf + +# Shell scripts must be LF: a CRLF shebang line (e.g. "#!/bin/sh\r") makes +# the kernel fail to locate the interpreter. +*.sh text eol=lf + +# PowerShell handles LF on every platform; keep it consistent with .editorconfig. +*.ps1 text eol=lf + +# Windows batch scripts use CRLF. +*.cmd text eol=crlf +*.bat text eol=crlf diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4f22f32..a159c3d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,9 +24,13 @@ permissions: jobs: build: name: Build & Test - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: + # Surface failures on every OS independently instead of cancelling the + # whole matrix when one runner fails. + fail-fast: false matrix: + os: [ubuntu-latest, windows-latest] go-version: ["1.25.x"] steps: @@ -43,14 +47,22 @@ jobs: - name: Build run: go build ./... + # Force bash on Windows runners too: pwsh's legacy native-argument + # passing splits `-coverprofile=coverage.out` and feeds `.out` to go + # as a package import path, producing `FAIL .out [setup failed]`. + # Git Bash is preinstalled on windows-latest and parses args verbatim. - name: Test (with race detector and coverage) + shell: bash run: go test -race -timeout 120s -covermode=atomic -coverpkg=./... -coverprofile=coverage.out ./... # Self-hosted coverage badge: parse the total from `go tool cover -func` # and rewrite .github/badges/coverage.json. The shields.io endpoint badge # in README.md reads this JSON via raw.githubusercontent.com. + # Ubuntu-only: the Windows leg of the matrix uses a different shell + # toolchain and we only need one canonical coverage number. - name: Compute coverage and update badge id: coverage + if: matrix.os == 'ubuntu-latest' run: | pct=$(go tool cover -func=coverage.out | awk '/^total:/ {gsub("%","",$3); print $3}') if [ -z "$pct" ]; then @@ -79,7 +91,7 @@ jobs: # upload would silently match nothing and, combined with # if-no-files-found: error, fail the build job on every push to main. - name: Upload coverage badge artifact - if: github.event_name == 'push' && github.ref == 'refs/heads/main' + if: matrix.os == 'ubuntu-latest' && github.event_name == 'push' && github.ref == 'refs/heads/main' uses: actions/upload-artifact@v5 with: name: coverage-badge @@ -121,6 +133,43 @@ jobs: git commit -m "chore(ci): update coverage badge to ${pct} [skip ci]" git push origin badges + e2e-windows: + # Windows e2e is intentionally narrower than the Linux e2e: it does not + # set SKILL_UP_FULL_E2E, so the LLM-dependent tests in e2e/cli_test.go, + # e2e/agent_test.go and e2e/mcp_test.go self-skip. The mock-engine and + # script-judge contract tests still exercise the Windows-specific code + # paths added by issue #31 — shell selection, MSYS argv handling, + # QuoteWindows, NewShellCmd's WaitDelay, the script-judge interpreter + # dispatch — through the full CLI pipeline. Real-LLM coverage stays on + # the Linux e2e job below. + name: E2E (none runtime, Windows) + runs-on: windows-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-go@v6 + with: + go-version: "1.25.x" + cache: true + + # Match the Build & Test step's shell choice so pwsh's legacy argv + # passing does not split `-coverprofile`-style flags. + - name: Run e2e tests (none runtime, quick mode) + shell: bash + run: go test -tags e2e -timeout 1200s -count=1 -v ./e2e + env: + SKILL_UP_E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts + + - name: Upload e2e workspace artifacts + if: always() && hashFiles('e2e-artifacts/**') != '' + uses: actions/upload-artifact@v5 + with: + name: e2e-windows-workspaces + path: e2e-artifacts/ + if-no-files-found: ignore + retention-days: 14 + e2e: name: E2E (none runtime) runs-on: ubuntu-latest @@ -213,7 +262,7 @@ jobs: - name: Upload e2e workspace artifacts if: always() && steps.secrets.outputs.available == 'true' && hashFiles('e2e-artifacts/**') != '' - uses: actions/upload-artifact@v7 + uses: actions/upload-artifact@v5 with: name: e2e-workspaces path: e2e-artifacts/ @@ -336,7 +385,7 @@ jobs: - name: Upload OpenSandbox server log if: always() && steps.secrets.outputs.available == 'true' - uses: actions/upload-artifact@v7 + uses: actions/upload-artifact@v5 with: name: opensandbox-server-log path: ${{ runner.temp }}/opensandbox-server.log @@ -345,7 +394,7 @@ jobs: - name: Upload opensandbox e2e workspace artifacts if: always() && steps.secrets.outputs.available == 'true' && hashFiles('e2e-opensandbox-artifacts/**') != '' - uses: actions/upload-artifact@v7 + uses: actions/upload-artifact@v5 with: name: e2e-opensandbox-workspaces path: e2e-opensandbox-artifacts/ diff --git a/AGENTS.md b/AGENTS.md index 22fe952..c2fa654 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -34,6 +34,11 @@ make lint-tools If you are in mainland China and `go install` is slow, set `GOPROXY=https://goproxy.cn,direct` before running the commands above. +On Windows, `make` is unavailable by default; use the PowerShell equivalents +in `scripts/windows/` (`hooks.ps1`, `lint-tools.ps1`, `verify.ps1`). See the +[Windows support guide](docs/guide/windows.md) for supported features and +known limitations. + ## Build & run ```bash diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d1de89..8d179a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,39 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.3.0] - 2026-05-27 + +### Added +- **First-class Windows support** for the CLI, the `none` runtime, and the + script judge. Native Windows builds run all unit tests, the script judge + routes `.ps1`/`.cmd`/`.bat` directly and `.sh` through Git Bash when + available, and CI gains a `windows-latest` build/test matrix plus a + dedicated `E2E (none runtime, Windows)` contract job. + See [Windows Support](docs/guide/windows.md) for the full guide. +- `SKILL_UP_BASH` environment variable: explicit path to a `bash` + executable for skill-up's `none` runtime to use. Honored on every + platform (read once at startup, takes precedence over `PATH`). +- PowerShell tooling under `scripts/windows/`: `hooks.ps1`, + `lint-tools.ps1`, and `verify.ps1` mirror the Makefile targets for + contributors on Windows; `examples/judge-debug-eval.ps1` provides a + runnable PowerShell script-judge example. +- `.gitattributes` pins line endings (LF for `*.sh`, CRLF for `*.ps1` / + `*.cmd` / `*.bat`) so Git checkout on Windows does not break scripts. + +### Changed +- Agent CLIs (Claude Code, Codex, Qoder CLI) now hard-fail on Windows + hosts without a discoverable bash, with a clear error pointing at + Git Bash or `SKILL_UP_BASH`. Previously the cmd.exe fallback would + accept agent commands but leak shell metacharacters from instructions + into the host shell. +- `internal/platform` centralizes host shell, quoter, and bash discovery + behind a single `platform.Host()` (cached for the process lifetime). + Replaces the previous ad-hoc platform branching in `NoneRuntime.Exec` + and the script-judge planner. +- `Runtime.TargetGOOS() string` is now a required interface method so + future runtimes get a compile-time error rather than silently + defaulting to `"linux"`. + ## [0.2.3] - 2026-05-27 ### Added diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 333cad4..be383d9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,6 +20,12 @@ We welcome bug reports, feature requests, documentation improvements, and code c # If you touched anything under e2e/ or internal/runner/, also run: make e2e ``` + On Windows, `make` is unavailable by default — use the PowerShell scripts + in `scripts/windows/` (`verify.ps1`, `lint-tools.ps1`, `hooks.ps1`) and the + standard `go build` / `go test -race ./...` commands. The `make e2e` + equivalent is `go test -tags e2e -v ./e2e` (with the same env vars the + Makefile target sets). See the + [Windows support guide](docs/guide/windows.md). 5. Commit using **Conventional Commits** (enforced by `.githooks/commit-msg`). See the *Commit Message* section below for the allowed types and examples. 6. Push your branch to your fork and open a Pull Request against `main`. Fill out the PR template, link any related issues, and describe the user-visible impact. 7. Update [`CHANGELOG.md`](CHANGELOG.md) in the same PR if your change is user-visible. diff --git a/README.md b/README.md index 696d32f..72e2e24 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,11 @@ make build go build -o bin/skill-up ./cmd/skill-up ``` +**Windows users**: skill-up runs natively on Windows. See +[Windows Support](docs/guide/windows.md) for the recommended workflow, +known limitations (notably: native agent CLI execution requires Git +Bash), and the PowerShell tooling under `scripts/windows/`. + ## Quick Start ### 1. Create Eval Config diff --git a/README.zh.md b/README.zh.md index bb05f0a..baeca7d 100644 --- a/README.zh.md +++ b/README.zh.md @@ -141,6 +141,11 @@ make build go build -o bin/skill-up ./cmd/skill-up ``` +**Windows 用户**:skill-up 原生支持 Windows。请参阅 +[Windows 支持指南](docs/zh/guide/windows.md) 了解推荐工作流、已知限制 +(特别是:原生运行 agent CLI 需要 Git Bash)以及 `scripts/windows/` +下的 PowerShell 工具脚本。 + ## 快速上手 ### 第一步:创建评测配置 diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts index 4f5f343..5f77440 100644 --- a/docs/.vitepress/config.ts +++ b/docs/.vitepress/config.ts @@ -51,6 +51,7 @@ export default defineConfig({ text: 'Introduction', items: [ { text: 'Getting Started', link: '/guide/getting-started' }, + { text: 'Windows Support', link: '/guide/windows' }, ], }, { @@ -106,6 +107,7 @@ export default defineConfig({ text: '入门', items: [ { text: '快速上手', link: '/zh/guide/getting-started' }, + { text: 'Windows 支持', link: '/zh/guide/windows' }, ], }, { diff --git a/docs/guide/windows.md b/docs/guide/windows.md new file mode 100644 index 0000000..1112a0e --- /dev/null +++ b/docs/guide/windows.md @@ -0,0 +1,109 @@ +# Windows Support + +skill-up runs natively on Windows. This page covers what works, the current +limitations, and the recommended workflow. + +--- + +## Supported + +- **Build and unit tests** — `go build ./...` and `go test ./...` pass on + Windows. CI exercises a `windows-latest` runner alongside Linux. +- **The `none` runtime** — commands run on the host through `cmd.exe`. +- **The `opensandbox` runtime** — unaffected by the host OS; it always + executes inside a Linux sandbox. +- **The script judge** — dispatches by file extension (or shebang): + + | Script | Interpreter on Windows | + | ----------------- | ------------------------------------------- | + | `.ps1` | PowerShell | + | `.cmd` / `.bat` | `cmd.exe` | + | `.sh` | bash (Git Bash; see below) | + +## Running `.sh` script judges on Windows + +A `.sh` script judge needs a `bash` interpreter. skill-up looks for one in +this order: + +1. the `SKILL_UP_BASH` environment variable (an explicit path to `bash.exe`); +2. `bash` on `PATH`; +3. well-known Git Bash install locations — + `C:\Program Files\Git\bin\bash.exe` and + `C:\Program Files (x86)\Git\bin\bash.exe`. + +If none is found the script judge fails with a clear error. Install +[Git for Windows](https://git-scm.com/download/win) or set `SKILL_UP_BASH`. + +The WSL shim at `C:\Windows\System32\bash.exe` is intentionally rejected at +all three steps (override, PATH, well-known) because it expects Linux-format +`/mnt/c/...` paths and silently fails on the Windows-style paths skill-up +generates. Users who want to drive script judges through WSL must arrange +path translation upstream and point `SKILL_UP_BASH` at a non-WSL bash — or +simply run skill-up inside WSL itself (see "Recommended workflow" below). + +## OpenSandbox runtime on Windows + +The `opensandbox` runtime talks to a remote OpenSandbox server over HTTP and +never spawns a host shell. Running `skill-up.exe` on native Windows against a +remote sandbox works today: all host-side path handling already crosses the +host→sandbox boundary through `filepath.ToSlash`, and the sandbox itself is a +Linux container, so the script judge and any agent run inside it behave +exactly as they do on Linux. + +OpenSandbox also offers a [**Windows guest profile**](https://github.com/alibaba/OpenSandbox/blob/main/docs/windows-sandbox.md): +the server runs `dockur/windows` (Windows in KVM/QEMU inside a Linux container) +and the API accepts `platform: {"os": "windows", "arch": "amd64"}` on create. +At the time of writing the Go SDK does not yet expose the `Platform` field, so +driving a Windows-guest sandbox from skill-up is blocked on an upstream Go SDK +update — tracked separately. + +For a Windows machine that needs the full agent workflow **without** a remote +sandbox, run skill-up inside **WSL2**. WSL2 is a Linux environment, so both the +`none` and `opensandbox` runtimes — including the agent Node/nvm bootstrap — +work without limitation. + +## Contributor tooling + +`make` is not available on Windows by default. Use the PowerShell scripts +under `scripts/windows/` instead: + +```powershell +# Install git hooks (equivalent to `make hooks`) +pwsh scripts/windows/hooks.ps1 + +# Install pinned lint tools into .tools/bin (equivalent to `make lint-tools`) +pwsh scripts/windows/lint-tools.ps1 + +# fmt-check + vet + revive + golangci-lint (equivalent to `make verify`) +pwsh scripts/windows/verify.ps1 +``` + +Build and test use the standard Go toolchain, which is cross-platform: + +```powershell +go build -o bin/skill-up.exe ./cmd/skill-up +go test -race ./... +``` + +## Known limitations + +- **Running real agents natively** — Claude Code / Codex / Qoder CLI are + launched through a bash-based Node/nvm bootstrap. That bootstrap does not + run under `cmd.exe`. To run full agent evals on Windows, either install + Node.js and the agent CLIs yourself beforehand, or use WSL2. +- **`.ps1` script judges require a Windows target** — when the runtime target + is POSIX (for example the `opensandbox` Linux sandbox), only `.sh` scripts + are supported. +- **`cmd.exe` expands `%VAR%` inside arguments** — when no bash is discovered + and the `cmd /d /s /c` fallback shell runs, literal `%NAME%` substrings + inside command arguments are still expanded by cmd. There is no reliable + command-line escape for this. Do not interpolate untrusted strings into + shell commands. Install Git Bash (which skill-up auto-discovers) to avoid + the cmd fallback entirely. + +## Recommended workflow + +- **Authoring and running script-judge evals** — native Windows works well. + Prefer `.ps1` script judges, or install Git for Windows for `.sh` support. +- **Running full agent evals** — use **WSL2**, so the evaluator and the agent + CLIs share one POSIX environment and avoid path/credential friction. diff --git a/docs/zh/guide/windows.md b/docs/zh/guide/windows.md new file mode 100644 index 0000000..2d78826 --- /dev/null +++ b/docs/zh/guide/windows.md @@ -0,0 +1,96 @@ +# Windows 支持 + +skill-up 原生支持 Windows。本页说明哪些功能可用、当前的限制,以及推荐的工作流。 + +--- + +## 已支持 + +- **构建与单元测试** —— `go build ./...` 和 `go test ./...` 在 Windows 上通过。 + CI 在 Linux 之外额外运行 `windows-latest` runner。 +- **`none` runtime** —— 命令通过 `cmd.exe` 在宿主机上执行。 +- **`opensandbox` runtime** —— 不受宿主机 OS 影响,始终在 Linux 沙箱内执行。 +- **script judge** —— 按文件扩展名(或 shebang)分派解释器: + + | 脚本 | Windows 上的解释器 | + | ----------------- | ------------------------------------------- | + | `.ps1` | PowerShell | + | `.cmd` / `.bat` | `cmd.exe` | + | `.sh` | bash(Git Bash,见下文) | + +## 在 Windows 上运行 `.sh` script judge + +`.sh` script judge 需要一个 `bash` 解释器。skill-up 按以下顺序查找: + +1. `SKILL_UP_BASH` 环境变量(指向 `bash.exe` 的明确路径); +2. `PATH` 上的 `bash`; +3. 知名 Git Bash 安装位置 —— `C:\Program Files\Git\bin\bash.exe` 与 + `C:\Program Files (x86)\Git\bin\bash.exe`。 + +若都找不到,script judge 会以明确的错误失败。请安装 +[Git for Windows](https://git-scm.com/download/win) 或设置 `SKILL_UP_BASH`。 + +`C:\Windows\System32\bash.exe`(WSL shim)会在三个步骤里都被主动忽略 —— +即使通过 `SKILL_UP_BASH` 显式指向或它在 PATH 上排在前面,因为它期望 Linux +风格的 `/mnt/c/...` 路径,而 skill-up 传入的是 Windows 路径,会静默失败。 +需要走 WSL 的用户请自行处理路径翻译并把 `SKILL_UP_BASH` 指向非 WSL 的 bash, +或者直接在 WSL 内运行 skill-up(见下文「推荐工作流」)。 + +## Windows 上的 OpenSandbox runtime + +`opensandbox` runtime 通过 HTTP 与远程 OpenSandbox 服务器通信,不会启动任何 +宿主机 shell。在原生 Windows 上运行 `skill-up.exe` 连接远程 sandbox 当前即可 +工作:所有宿主机侧的路径处理都已通过 `filepath.ToSlash` 跨越「宿主机→sandbox」 +边界,而 sandbox 本身是 Linux 容器,因此其中的 script judge 和 agent 行为与在 +Linux 上完全一致。 + +OpenSandbox 也提供 [**Windows guest profile**](https://github.com/alibaba/OpenSandbox/blob/main/docs/windows-sandbox.md): +服务端在 Linux 容器里通过 KVM/QEMU 运行 `dockur/windows`,创建 API 接受 +`platform: {"os": "windows", "arch": "amd64"}`。撰写本文时 Go SDK 尚未暴露 +`Platform` 字段,因此从 skill-up 驱动 Windows guest sandbox 依赖上游 Go SDK +补齐 —— 单独跟进。 + +如果某台 Windows 机器需要在**没有**远程 sandbox 的情况下使用完整的 agent +工作流,请在 **WSL2** 中运行 skill-up。WSL2 是 Linux 环境,因此 `none` 与 +`opensandbox` 两种 runtime —— 包括 agent 的 Node/nvm 引导 —— 都能无限制工作。 + +## 贡献者工具 + +Windows 默认没有 `make`。请改用 `scripts/windows/` 下的 PowerShell 脚本: + +```powershell +# 安装 git hooks(等价于 `make hooks`) +pwsh scripts/windows/hooks.ps1 + +# 将固定版本的 lint 工具装入 .tools/bin(等价于 `make lint-tools`) +pwsh scripts/windows/lint-tools.ps1 + +# fmt-check + vet + revive + golangci-lint(等价于 `make verify`) +pwsh scripts/windows/verify.ps1 +``` + +构建和测试使用标准的 Go 工具链,本身就是跨平台的: + +```powershell +go build -o bin/skill-up.exe ./cmd/skill-up +go test -race ./... +``` + +## 已知限制 + +- **原生运行真实 agent** —— Claude Code / Codex / Qoder CLI 通过基于 bash 的 + Node/nvm 引导脚本启动,该脚本无法在 `cmd.exe` 下运行。要在 Windows 上运行 + 完整的 agent 评测,请预先自行安装 Node.js 和对应的 agent CLI,或使用 WSL2。 +- **`.ps1` script judge 需要 Windows 目标** —— 当 runtime 目标是 POSIX + (例如 `opensandbox` 的 Linux 沙箱)时,仅支持 `.sh` 脚本。 +- **`cmd.exe` 会展开参数里的 `%VAR%`** —— 当宿主未发现 bash、回退到 + `cmd /d /s /c` 时,参数中的 `%NAME%` 子串仍会被 cmd 展开,命令行层面没有 + 可靠的转义办法。不要把不可信字符串拼接到 shell 命令中。安装 Git Bash + (skill-up 会自动发现)可完全避开此回退路径。 + +## 推荐工作流 + +- **编写并运行 script-judge 评测** —— 原生 Windows 即可。优先使用 `.ps1` + script judge,或安装 Git for Windows 以支持 `.sh`。 +- **运行完整的 agent 评测** —— 使用 **WSL2**,让评测器与 agent CLI 共享同一个 + POSIX 环境,避免路径与凭据的摩擦。 diff --git a/e2e/main_test.go b/e2e/main_test.go index 21ea5b5..f4682b2 100644 --- a/e2e/main_test.go +++ b/e2e/main_test.go @@ -24,7 +24,14 @@ func mustCompile() (string, func()) { panic("creating temp dir: " + err.Error()) } - binPath := filepath.Join(dir, "skill-up") + binName := "skill-up" + if runtime.GOOS == "windows" { + // Windows refuses to execute a file without a recognized extension, + // so go build's -o output must end in .exe or every later + // exec.Command(binaryPath) fails with "executable file not found". + binName += ".exe" + } + binPath := filepath.Join(dir, binName) _, testFile, _, _ := runtime.Caller(0) projectRoot := filepath.Dir(filepath.Dir(testFile)) diff --git a/examples/judge-debug-eval.ps1 b/examples/judge-debug-eval.ps1 new file mode 100644 index 0000000..b7b5fd0 --- /dev/null +++ b/examples/judge-debug-eval.ps1 @@ -0,0 +1,17 @@ +#!/usr/bin/env pwsh +# Evaluation script: check whether the agent output identified a bug and +# provided a fix suggestion. PowerShell counterpart of judge-debug-eval.sh, +# for use as a script judge on Windows hosts. +$message = $env:EVAL_FINAL_MESSAGE + +if ($message -match 'bug') { + if ($message -match 'fix|nil') { + Write-Output 'Agent identified the bug AND provided a fix' + exit 0 + } + Write-Output 'Agent identified the bug but did NOT provide a fix' + exit 1 +} + +Write-Output 'Agent failed to identify any bug' +exit 1 diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 0411650..c9e22dc 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -14,13 +14,37 @@ import ( "github.com/alibaba/skill-up/internal/credential" "github.com/alibaba/skill-up/internal/logging" "github.com/alibaba/skill-up/internal/observability" + "github.com/alibaba/skill-up/internal/platform" "github.com/alibaba/skill-up/internal/runtime" + "github.com/alibaba/skill-up/internal/shellquote" "github.com/alibaba/skill-up/pkg/transcript" ) // ErrAgentNotFound is returned when the agent executable is not found. var ErrAgentNotFound = errors.New("agent not found in PATH") +// ErrAgentRequiresBash is returned when an agent CLI is invoked on a Windows +// host where Git Bash (or another bash discoverable by platform.DiscoverBash) +// is not available. Agent commands are POSIX-quoted and assume a bash +// interpreter; running them through the cmd.exe fallback would let metachars +// (`& | " %VAR%`) in the instruction reach the shell unprotected. See +// docs/guide/windows.md for the documented limitation. +var ErrAgentRequiresBash = errors.New("agent CLI execution on Windows requires bash; install Git for Windows or set SKILL_UP_BASH") + +// requireBashOnWindowsHost rejects agent execution when the runtime's target +// is Windows but the host shell would be cmd.exe. We only enforce this for +// runtimes whose target matches the host (NoneRuntime today); sandboxed +// runtimes target a non-Windows guest and never go through platform.Host(). +func requireBashOnWindowsHost(rt Runtime) error { + if rt.TargetGOOS() != platform.GOOSWindows { + return nil + } + if platform.Host().IsBash { + return nil + } + return ErrAgentRequiresBash +} + // ErrAgentInstallFailed is returned when agent installation fails. var ErrAgentInstallFailed = errors.New("agent installation failed") @@ -114,9 +138,13 @@ const ExitCodeSignalKilled = -1 const ( agentProviderOpenAI = "openai" agentProviderAnthropic = "anthropic" - agentExecutablePath = "$HOME/.local/bin:$HOME/.nvm/current/bin:$PATH" ) +// agentExecutablePath is defined per host OS in path_{windows,other}.go: a +// POSIX PATH override pointing at the nvm/Node bootstrap install locations on +// POSIX hosts, and an empty string on Windows so the host's native PATH (and +// case-insensitive `Path`) reach `where` lookups untouched. + // NewBaseAgent creates a new BaseAgent with the given config. // It preserves the resolved config passed from the caller. func NewBaseAgent(cfg Config) BaseAgent { @@ -169,8 +197,7 @@ func (a *BaseAgent) logCredentialStatus(ctx context.Context, apiKeyEnv, baseURLE // This does not scan unrelated providers. if apiKey := os.Getenv(apiKeyEnv); apiKey != "" { logging.DebugContextf(ctx, "%s detected for %s (source=process_env)", apiKeyEnv, a.Name()) - if baseURL := os.Getenv(baseURLEnv); baseURLEnv != "" && baseURL != "" { - _ = baseURL + if baseURLEnv != "" && os.Getenv(baseURLEnv) != "" { logging.DebugContextf(ctx, "%s detected for %s (source=process_env)", baseURLEnv, a.Name()) } return @@ -258,7 +285,10 @@ func downloadSessionArtifact(ctx context.Context, rt Runtime, artifactDir, sessi } func (a *BaseAgent) mergeExecOptionsEnv(ctx context.Context, opts ExecOptions, envVars map[string]string, attrs map[string]string) ExecOptions { - merged := map[string]string{"PATH": agentExecutablePath} + merged := map[string]string{} + if agentExecutablePath != "" { + merged["PATH"] = agentExecutablePath + } maps.Copy(merged, envVars) maps.Copy(merged, opts.Env) maps.Copy(merged, observability.AgentEnv(ctx, merged, attrs)) @@ -290,22 +320,13 @@ func formatAgentModel(provider, model string) string { return provider + "/" + model } -// shellQuote quotes a string for safe shell usage. +// shellQuote quotes a string for safe POSIX shell usage. Agent commands are +// always composed for and executed by bash (via the Node/nvm bootstrap), so +// POSIX quoting is correct even when skill-up itself runs on a Windows host. +// It delegates to internal/shellquote so the project keeps a single quoting +// implementation. func shellQuote(s string) string { - if s == "" { - return "''" - } - var result strings.Builder - result.WriteByte('\'') - for _, c := range s { - if c == '\'' { - result.WriteString(`'\''`) - } else { - result.WriteRune(c) - } - } - result.WriteByte('\'') - return result.String() + return shellquote.QuotePOSIX(s) } // BuildInstructionFromMessages converts messages to a single instruction string. diff --git a/internal/agent/claude_code.go b/internal/agent/claude_code.go index 651700d..1dc13a9 100644 --- a/internal/agent/claude_code.go +++ b/internal/agent/claude_code.go @@ -96,6 +96,9 @@ func (a *ClaudeCodeAgent) CheckCredentials(ctx context.Context) error { // Run executes the claude-code agent with the given messages via stream-json. // It streams messages to stdin and parses stream-json output to build the transcript. func (a *ClaudeCodeAgent) Run(ctx context.Context, rt Runtime, opts ExecOptions, messages []transcript.Message) (*SessionResult, error) { + if err := requireBashOnWindowsHost(rt); err != nil { + return nil, fmt.Errorf("%s: %w", a.Name(), err) + } start := time.Now() sessionID := uuid.New().String() diff --git a/internal/agent/claude_code_test.go b/internal/agent/claude_code_test.go index 72c93dd..60fb3a7 100644 --- a/internal/agent/claude_code_test.go +++ b/internal/agent/claude_code_test.go @@ -741,3 +741,4 @@ func (r *claudeCodeTestRuntime) Workspace() string { return r.workspace } func (r *claudeCodeTestRuntime) RequiresProcessSandbox() bool { return true } +func (r *claudeCodeTestRuntime) TargetGOOS() string { return "linux" } diff --git a/internal/agent/cli.go b/internal/agent/cli.go index 4bdd270..7545e77 100644 --- a/internal/agent/cli.go +++ b/internal/agent/cli.go @@ -5,10 +5,13 @@ import ( "context" "fmt" "path/filepath" + "regexp" + "strings" "text/template" "time" "github.com/alibaba/skill-up/internal/observability" + "github.com/alibaba/skill-up/internal/platform" "github.com/alibaba/skill-up/internal/runtime" "github.com/alibaba/skill-up/pkg/transcript" ) @@ -121,6 +124,9 @@ func (a *CLIAgent) InstallSkill(ctx context.Context, rt Runtime, skillCfg runtim // Run executes the agent with the given messages and returns the session result. func (a *CLIAgent) Run(ctx context.Context, rt Runtime, opts ExecOptions, messages []transcript.Message) (*SessionResult, error) { + if err := requireBashOnWindowsHost(rt); err != nil { + return nil, fmt.Errorf("%s: %w", a.Name(), err) + } start := time.Now() instruction := BuildInstructionFromMessages(messages) @@ -151,12 +157,42 @@ func (a *CLIAgent) Run(ctx context.Context, rt Runtime, opts ExecOptions, messag return sessionResult, nil } +// commandVRegexp matches a POSIX `command -v [rest]` check. The +// regex form (vs strings.CutPrefix) lets us capture the binary separately +// from any trailing redirect or pipe, and supports surrounding whitespace +// the way a real shell would. +var commandVRegexp = regexp.MustCompile(`^\s*command\s+-v\s+(\S+)(\s.*)?$`) + +// checkCommandForOS adapts a POSIX `command -v X` availability check to the +// target OS. Windows cmd.exe has no `command` builtin; `where` is the +// equivalent. Common POSIX-only redirect targets (`/dev/null`) are rewritten +// to their cmd equivalent (`nul`) so a quiet probe like +// `command -v codex >/dev/null 2>&1` continues to silence its output +// instead of failing to open the missing /dev/null path. Other command +// forms are returned unchanged. +func checkCommandForOS(checkCmd, goos string) string { + if goos != platform.GOOSWindows { + return checkCmd + } + m := commandVRegexp.FindStringSubmatch(checkCmd) + if m == nil { + return checkCmd + } + binary, rest := m[1], m[2] + rest = strings.ReplaceAll(rest, "/dev/null", "nul") + return "where " + binary + rest +} + // Check verifies the agent executable is available. func (a *CLIAgent) Check(ctx context.Context, rt Runtime) error { + if err := requireBashOnWindowsHost(rt); err != nil { + return fmt.Errorf("%s: %w", a.Name(), err) + } checkCmd := a.Cfg.CheckCmd if checkCmd == "" { return fmt.Errorf("CheckCmd not configured for agent %s", a.Name()) } + checkCmd = checkCommandForOS(checkCmd, rt.TargetGOOS()) result, err := rt.Exec(ctx, checkCmd, a.mergeExecOptionsEnv(ctx, ExecOptions{}, nil, nil)) if err != nil { diff --git a/internal/agent/cli_test.go b/internal/agent/cli_test.go index 7eedc70..beffd90 100644 --- a/internal/agent/cli_test.go +++ b/internal/agent/cli_test.go @@ -4,13 +4,28 @@ import ( "context" "os" "path/filepath" + goruntime "runtime" "testing" + "github.com/alibaba/skill-up/internal/platform" "github.com/alibaba/skill-up/internal/runtime" "github.com/alibaba/skill-up/pkg/transcript" ) +// skipIfNoPOSIXShell skips tests that rely on a POSIX shell at the host +// (RunCmd/InstallSkillCmd/InstallMCPCmd templates baked with bash builtins, +// `&&` pipelines, `$VAR` expansion, etc.). On Windows skill-up's none runtime +// uses cmd.exe, which can't interpret those constructs — agent execution on +// native Windows is intentionally out of scope; users go through WSL2. +func skipIfNoPOSIXShell(t *testing.T) { + t.Helper() + if goruntime.GOOS == platform.GOOSWindows { + t.Skip("POSIX-shell agent template; native Windows agent execution is unsupported (use WSL2)") + } +} + func TestCLIAgent_Run(t *testing.T) { + skipIfNoPOSIXShell(t) t.Parallel() // Use NoneRuntime as the test runtime @@ -41,6 +56,7 @@ func TestCLIAgent_Run(t *testing.T) { } func TestCLIAgent_RunExitError(t *testing.T) { + skipIfNoPOSIXShell(t) t.Parallel() rt := &runtime.NoneRuntime{} @@ -172,6 +188,7 @@ func TestCLIAgent_InstallSkillDefault(t *testing.T) { } func TestCLIAgent_InstallSkillWithCmd(t *testing.T) { + skipIfNoPOSIXShell(t) t.Parallel() rt := &runtime.NoneRuntime{} @@ -259,6 +276,7 @@ func TestCLIAgent_InstallMCPConfiguredServersRequireInstallCommand(t *testing.T) } func TestCLIAgent_InstallMCPUsesResolvedEndpointConfigRefAndEnv(t *testing.T) { + skipIfNoPOSIXShell(t) t.Parallel() rt := &runtime.NoneRuntime{} @@ -298,3 +316,23 @@ func TestCLIAgent_InstallMCPUsesResolvedEndpointConfigRefAndEnv(t *testing.T) { t.Fatalf("marker content: got %q, want %q", string(data), want) } } + +func TestCheckCommandForOS(t *testing.T) { + tests := []struct { + name, in, goos, want string + }{ + {"posix unchanged", "command -v codex", "linux", "command -v codex"}, + {"darwin unchanged", "command -v claude", "darwin", "command -v claude"}, + {"windows translates", "command -v codex", platform.GOOSWindows, "where codex"}, + {"windows redirects /dev/null", "command -v codex >/dev/null 2>&1", platform.GOOSWindows, "where codex >nul 2>&1"}, + {"windows stderr /dev/null", "command -v claude 2>/dev/null", platform.GOOSWindows, "where claude 2>nul"}, + {"windows non-command form unchanged", "codex --version", platform.GOOSWindows, "codex --version"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := checkCommandForOS(tt.in, tt.goos); got != tt.want { + t.Fatalf("checkCommandForOS(%q, %q) = %q, want %q", tt.in, tt.goos, got, tt.want) + } + }) + } +} diff --git a/internal/agent/codex.go b/internal/agent/codex.go index df6eb3a..3c9623f 100644 --- a/internal/agent/codex.go +++ b/internal/agent/codex.go @@ -224,6 +224,9 @@ func (a *CodexAgent) CheckCredentials(ctx context.Context) error { // //nolint:dupl func (a *CodexAgent) Run(ctx context.Context, rt Runtime, opts ExecOptions, messages []transcript.Message) (*SessionResult, error) { + if err := requireBashOnWindowsHost(rt); err != nil { + return nil, fmt.Errorf("%s: %w", a.Name(), err) + } start := time.Now() instruction := BuildInstructionFromMessages(messages) diff --git a/internal/agent/codex_test.go b/internal/agent/codex_test.go index 49053e2..e2f6b73 100644 --- a/internal/agent/codex_test.go +++ b/internal/agent/codex_test.go @@ -1006,3 +1006,4 @@ func (r *codexTestRuntime) Workspace() string { return r.workspace } func (r *codexTestRuntime) RequiresProcessSandbox() bool { return r.workspace != "opensandbox" } +func (r *codexTestRuntime) TargetGOOS() string { return "linux" } diff --git a/internal/agent/path_other.go b/internal/agent/path_other.go new file mode 100644 index 0000000..073bc2d --- /dev/null +++ b/internal/agent/path_other.go @@ -0,0 +1,8 @@ +//go:build !windows + +package agent + +// agentExecutablePath prepends the agent CLI's installed-from-bootstrap +// locations onto PATH so the nvm-managed node and ~/.local/bin shims win over +// any older system installs. +var agentExecutablePath = "$HOME/.local/bin:$HOME/.nvm/current/bin:$PATH" diff --git a/internal/agent/path_windows.go b/internal/agent/path_windows.go new file mode 100644 index 0000000..860a1aa --- /dev/null +++ b/internal/agent/path_windows.go @@ -0,0 +1,10 @@ +//go:build windows + +package agent + +// agentExecutablePath is empty on Windows: the POSIX bootstrap does not run +// natively, and overriding PATH with a colon-separated string would break +// the host's `where` lookups (and conflict with case-insensitive `Path`). +// Letting the inherited environment flow through keeps preinstalled CLIs +// reachable. +var agentExecutablePath = "" diff --git a/internal/agent/qodercli.go b/internal/agent/qodercli.go index 08dc2b0..08c398c 100644 --- a/internal/agent/qodercli.go +++ b/internal/agent/qodercli.go @@ -65,6 +65,9 @@ func (a *QoderCLIAgent) CheckCredentials(ctx context.Context) error { // //nolint:dupl func (a *QoderCLIAgent) Run(ctx context.Context, rt Runtime, opts ExecOptions, messages []transcript.Message) (*SessionResult, error) { + if err := requireBashOnWindowsHost(rt); err != nil { + return nil, fmt.Errorf("%s: %w", a.Name(), err) + } start := time.Now() instruction := BuildInstructionFromMessages(messages) diff --git a/internal/agent/qodercli_test.go b/internal/agent/qodercli_test.go index 7294163..994418e 100644 --- a/internal/agent/qodercli_test.go +++ b/internal/agent/qodercli_test.go @@ -4,6 +4,7 @@ import ( "context" "os" "path/filepath" + goruntime "runtime" "strings" "testing" "time" @@ -293,6 +294,13 @@ func TestFindQoderSessionFile(t *testing.T) { } func TestFindQoderSessionFile_SelectsNewestByModTime(t *testing.T) { + if goruntime.GOOS == "windows" { + // The workspace-key path layout embeds a Linux-style workspace path, + // which contains a colon on Windows (e.g. `C:`) and cannot be a + // directory component. Qoder native Windows agent execution is out of + // scope; this test is exercised on Linux/darwin only. + t.Skip("qoder workspace-key path layout is POSIX-only") + } tmpHome := t.TempDir() t.Setenv("HOME", tmpHome) @@ -397,3 +405,4 @@ func (r *qoderTestRuntime) Workspace() string { return r.workspace } func (r *qoderTestRuntime) RequiresProcessSandbox() bool { return true } +func (r *qoderTestRuntime) TargetGOOS() string { return "linux" } diff --git a/internal/cli/import.go b/internal/cli/import.go index 7a78a6b..71bc5e5 100644 --- a/internal/cli/import.go +++ b/internal/cli/import.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "path" "path/filepath" "github.com/spf13/cobra" @@ -156,7 +157,9 @@ func generateEvalConfig(caseIDs []string, outputDir string) *config.EvalConfig { caseFiles := make([]string, len(caseIDs)) for i, caseID := range caseIDs { - caseFiles[i] = filepath.Join(relPrefix, "cases", caseID+".yaml") + // eval.yaml is a portable config consumed by the loader on any OS; + // always use forward slashes for case paths regardless of the host. + caseFiles[i] = path.Join(filepath.ToSlash(relPrefix), "cases", caseID+".yaml") } cfg.Cases.Files = caseFiles diff --git a/internal/evaluator/evaluator_test.go b/internal/evaluator/evaluator_test.go index a6e1f9a..279bfc1 100644 --- a/internal/evaluator/evaluator_test.go +++ b/internal/evaluator/evaluator_test.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "path/filepath" + goruntime "runtime" "strings" "sync" "sync/atomic" @@ -96,6 +97,7 @@ func (m *mockRuntime) Create(_ context.Context) error { return func (m *mockRuntime) Close() error { return nil } func (m *mockRuntime) Workspace() string { return m.workspace } func (m *mockRuntime) RequiresProcessSandbox() bool { return true } +func (m *mockRuntime) TargetGOOS() string { return "linux" } func (m *mockRuntime) Start(_ context.Context) error { return nil } func (m *mockRuntime) Stop(_ context.Context) error { return nil } func (m *mockRuntime) UploadFile(_ context.Context, _, _ string) error { return nil } @@ -2490,7 +2492,7 @@ func TestContextFilesUploader_RejectsUnsafePaths(t *testing.T) { }{ {name: "empty", path: ""}, {name: "workspace root", path: "."}, - {name: "absolute", path: filepath.Join(string(filepath.Separator), "tmp", "secret.txt")}, + {name: "absolute", path: absoluteSecretPath()}, {name: "parent traversal", path: "../secret.txt"}, {name: "nested parent traversal", path: "fixtures/../../secret.txt"}, } @@ -2709,3 +2711,13 @@ func TestGitInitUploader_InitWithRemotes(t *testing.T) { t.Fatalf("expected remote URL in output, got %s", result.Stdout) } } + +// absoluteSecretPath returns a path that filepath.IsAbs reports as absolute on +// the host OS. On Windows that requires a drive letter; `\tmp\secret.txt` +// alone is considered relative. +func absoluteSecretPath() string { + if goruntime.GOOS == "windows" { + return `C:\tmp\secret.txt` + } + return "/tmp/secret.txt" +} diff --git a/internal/evaluator/fixtures.go b/internal/evaluator/fixtures.go index 4aa46ca..2ac64d2 100644 --- a/internal/evaluator/fixtures.go +++ b/internal/evaluator/fixtures.go @@ -129,7 +129,7 @@ func (g *gitInitUploader) Upload(ctx context.Context, rt runtime.Runtime, caseCf return err } fmt.Fprintf(&script, "git remote add %s %s\n", - shellquote.Quote(remote.Name), shellquote.Quote(remote.URL)) + shellquote.QuotePOSIX(remote.Name), shellquote.QuotePOSIX(remote.URL)) } result, err := rt.Exec(ctx, script.String(), runtime.ExecOptions{ @@ -170,7 +170,10 @@ func (g *gitCheckoutUploader) Upload(ctx context.Context, rt runtime.Runtime, ca // invocations and is never interpolated into the error message (a // double-quoted echo would re-evaluate command substitutions); the Go // error below already reports the branch name safely via %q. - quoted := shellquote.Quote(branch) + // The script runs in a POSIX shell inside the runtime (either the + // `none` runtime on a POSIX host, or the Linux OpenSandbox), so quote + // with POSIX rules explicitly rather than the host-aware Quote. + quoted := shellquote.QuotePOSIX(branch) script := fmt.Sprintf("set -eu\n"+ "if git switch %[1]s 2>/dev/null; then :\n"+ "elif ! git rev-parse --verify --quiet HEAD >/dev/null 2>&1; then git switch -c %[1]s\n"+ @@ -222,7 +225,7 @@ func (a *applyDiffUploader) Upload(ctx context.Context, rt runtime.Runtime, case } // Quote the tmp path and use `--` to stop git from treating the path as an option. - result, err := rt.Exec(ctx, "git apply -- "+shellquote.Quote(tmpPath), runtime.ExecOptions{ + result, err := rt.Exec(ctx, "git apply -- "+shellquote.QuotePOSIX(tmpPath), runtime.ExecOptions{ Cwd: rt.Workspace(), }) if err != nil { diff --git a/internal/judge/e2e_test.go b/internal/judge/e2e_test.go index 0c17f54..664c214 100644 --- a/internal/judge/e2e_test.go +++ b/internal/judge/e2e_test.go @@ -35,6 +35,7 @@ import ( "testing" "github.com/alibaba/skill-up/internal/config" + "github.com/alibaba/skill-up/internal/platform" "github.com/alibaba/skill-up/pkg/transcript" ) @@ -616,15 +617,12 @@ func TestE2E_GradingJSON_SkipResult_Format(t *testing.T) { // --------------------------------------------------------------------------- func TestE2E_ScriptJudge_FullPipeline(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("skipping script test on windows") - } - dir := t.TempDir() // Create the evaluation script: check whether EVAL_FINAL_MESSAGE - // contains "bug". - scriptContent := `#!/bin/sh + // contains "bug". The script judge dispatches by extension, so the host + // OS determines whether a POSIX shell or a Windows batch script is used. + scriptName, scriptContent := "eval_check.sh", `#!/bin/sh # Evaluation script: check whether the agent output identifies the bug. # Reads the EVAL_FINAL_MESSAGE env var (injected by ScriptJudge). if echo "$EVAL_FINAL_MESSAGE" | grep -q "bug"; then @@ -635,7 +633,18 @@ else exit 1 fi ` - scriptPath := writeScript(t, dir, "eval_check.sh", scriptContent) + if runtime.GOOS == platform.GOOSWindows { + scriptName, scriptContent = "eval_check.cmd", "@echo off\r\n"+ + "echo %EVAL_FINAL_MESSAGE% | findstr /C:\"bug\" >nul\r\n"+ + "if %errorlevel%==0 (\r\n"+ + " echo Agent correctly identified the bug\r\n"+ + " exit /b 0\r\n"+ + ") else (\r\n"+ + " echo Agent failed to identify the bug\r\n"+ + " exit /b 1\r\n"+ + ")\r\n" + } + scriptPath := writeScript(t, dir, scriptName, scriptContent) expectCfg := &config.Expect{} diff --git a/internal/judge/helpers_test.go b/internal/judge/helpers_test.go index 5b3fc0f..75f1c40 100644 --- a/internal/judge/helpers_test.go +++ b/internal/judge/helpers_test.go @@ -120,6 +120,7 @@ func (m *mockJudgeTestRuntime) Create(_ context.Context) error func (m *mockJudgeTestRuntime) Close() error { return nil } func (m *mockJudgeTestRuntime) Workspace() string { return "/tmp/test" } func (m *mockJudgeTestRuntime) RequiresProcessSandbox() bool { return true } +func (m *mockJudgeTestRuntime) TargetGOOS() string { return "linux" } func (m *mockJudgeTestRuntime) Start(_ context.Context) error { return nil } func (m *mockJudgeTestRuntime) Stop(_ context.Context) error { return nil } func (m *mockJudgeTestRuntime) UploadFile(_ context.Context, _, _ string) error { return nil } diff --git a/internal/judge/interpreter.go b/internal/judge/interpreter.go new file mode 100644 index 0000000..f5af1c2 --- /dev/null +++ b/internal/judge/interpreter.go @@ -0,0 +1,552 @@ +package judge + +import ( + "bufio" + "fmt" + "os" + "path" + "path/filepath" + "strings" + "time" + + "github.com/alibaba/skill-up/internal/platform" + "github.com/alibaba/skill-up/internal/shellquote" +) + +// scriptPlan describes how the script judge uploads and runs an evaluation +// script in a runtime whose commands execute on a particular target OS. +type scriptPlan struct { + // uploadName is the basename the script is uploaded as. The extension is + // preserved so the interpreter dispatch stays unambiguous. + uploadName string + // command builds the runtime Exec command string for the uploaded script + // at remoteScript (its path inside the runtime). + command func(remoteScript string) string + // cleanupCommand builds the command that recursively removes the + // per-judge temp dir on the target OS, using the same quoting rules as + // command so the same shell ultimately interprets it. + cleanupCommand func(dir string) string + // envPath translates a runtime-side path (e.g. EVAL_TRANSCRIPT_PATH) + // into the form the script's interpreter will accept. Identity for most + // targets; the Windows `.sh` plan converts to forward slashes so POSIX + // tools running inside Git Bash can open the file. + envPath func(p string) string +} + +// identityEnvPath is the default envPath used by plans that need no +// translation between the runtime-side path and the script's view of it. +func identityEnvPath(p string) string { return p } + +// planScript determines how to execute scriptPath in a runtime whose commands +// run on targetGOOS. +// +// POSIX targets keep the original behavior: the script is uploaded verbatim +// and run via its own shebang. Windows targets dispatch to an interpreter +// based on the file extension (or shebang when the extension is absent). +func planScript(scriptPath, targetGOOS string) (scriptPlan, error) { + if targetGOOS != platform.GOOSWindows { + return scriptPlan{ + uploadName: "script", + command: func(remoteScript string) string { + q := shellquote.QuotePOSIX(remoteScript) + return "chmod 700 " + q + " && " + q + }, + cleanupCommand: func(dir string) string { + return "rm -rf " + shellquote.QuotePOSIX(dir) + }, + envPath: identityEnvPath, + }, nil + } + return planWindowsScript(scriptPath, platform.Host()) +} + +func planWindowsScript(scriptPath string, shell platform.HostShell) (scriptPlan, error) { + // Read and classify the shebang once: both the .ps1 (pwsh-vs-powershell + // selection, option forwarding) and .sh (bash strict-mode flags) plans + // consume the result. shebangInterp is "" when no recognized shebang + // was found. + shebangInterp, shebangOpts := parseShebang(readShebang(scriptPath)) + + ext := strings.ToLower(filepath.Ext(scriptPath)) + if ext == "" { + ext = extensionForShebangInterpreter(shebangInterp) + } + + // Every command we emit (script run + cleanup) is quoted with the host + // shell's own quoter so the same shell semantics applies end to end. + // shell.Quote already accounts for bash-vs-cmd selection -- there is no + // second discovery here, ruling out the chance of the two decisions + // disagreeing. + quote := shell.Quote + // .ps1 / .cmd / .bat all execute through cmd.exe, so cleanup through cmd + // keeps quoting / strip semantics inside one shell. The `/d /s /c` flags + // match the cmd fallback in platform.Host so the strip rule behaves the + // same way for the inner command. + winCleanup := func(dir string) string { + return "cmd /d /s /c rd /s /q " + quote(dir) + } + + switch ext { + case ".ps1": + // Pick pwsh.exe (PowerShell Core 7+) when the shebang explicitly + // asks for it; default to powershell.exe (Windows PowerShell 5.x) + // otherwise. Forward any shebang-encoded interpreter flags so + // `#!/usr/bin/env -S pwsh -NoLogo` does not silently lose -NoLogo + // when we re-invoke the interpreter ourselves. -NoProfile and + // -ExecutionPolicy Bypass are always appended because Windows + // PowerShell's default Restricted policy blocks unsigned scripts; + // the duplicate -NoProfile case is harmless if the shebang also + // sets it. + psBinary := psInterpLegacy + if shebangInterp == psInterpCore { + psBinary = psInterpCore + } + psArgs := []string{psBinary} + // Only forward shebang opts when the shebang itself names a + // PowerShell interpreter -- a `.ps1` file with a bogus + // `#!/bin/bash -eu` shebang otherwise leaks bash flags into the + // powershell invocation. + if shebangInterp == psInterpCore || shebangInterp == psInterpLegacy { + for _, o := range shebangOpts { + psArgs = append(psArgs, quote(o)) + } + } + psArgs = append(psArgs, "-NoProfile", "-ExecutionPolicy", "Bypass", "-File") + return scriptPlan{ + uploadName: "script.ps1", + command: func(remoteScript string) string { + // Defensive copy: appending to the captured psArgs + // directly would mutate its backing array if cap + // exceeds len. The .sh branch below does the same. + args := append([]string{}, psArgs...) + args = append(args, quote(remoteScript)) + return strings.Join(args, " ") + }, + cleanupCommand: winCleanup, + envPath: identityEnvPath, + }, nil + case ".cmd", ".bat": + return scriptPlan{ + uploadName: "script" + ext, + command: func(remoteScript string) string { + return "cmd /d /s /c " + quote(remoteScript) + }, + cleanupCommand: winCleanup, + envPath: identityEnvPath, + }, nil + case ".sh", ".bash": + if !shell.IsBash { + return scriptPlan{}, fmt.Errorf( + "script judge: .sh script requires bash on Windows; install Git Bash or set %s", + platform.BashEnvOverride) + } + // Forward shebang-encoded options (`#!/bin/bash -eu`, + // `#!/usr/bin/env -S bash -eu`, ...) so strict-mode flags that + // POSIX honors via shebang aren't silently dropped when we invoke + // bash explicitly on Windows. + bashArgs := []string{quote(shell.Bash)} + // Forward only when the shebang actually names a POSIX shell, so a + // `.sh` file with an unrelated shebang (e.g. `#!/usr/bin/env pwsh` + // renamed to .sh) does not feed PowerShell flags to bash. + if shebangPOSIXShells[shebangInterp] { + for _, o := range shebangOpts { + bashArgs = append(bashArgs, quote(o)) + } + } + return scriptPlan{ + uploadName: "script.sh", + command: func(remoteScript string) string { + // bash on Windows accepts forward-slash paths; we also + // keep EVAL_TRANSCRIPT_PATH in that form (see envPath + // below) so POSIX tools inside the script can `cat` it. + args := append([]string{}, bashArgs...) + args = append(args, quote(filepath.ToSlash(remoteScript))) + return strings.Join(args, " ") + }, + // Stay inside bash for cleanup too. The .sh case already runs + // through bash -c, and Git Bash's rm understands the + // forward-slash form of the Windows temp dir natively, so + // `rm -rf ` avoids the bash -> cmd hop the + // other extensions go through. + cleanupCommand: func(dir string) string { + return "rm -rf " + quote(filepath.ToSlash(dir)) + }, + envPath: filepath.ToSlash, + }, nil + default: + return scriptPlan{}, fmt.Errorf( + "script judge: cannot determine interpreter for %s on Windows: "+ + "add a .sh, .ps1, or .cmd extension or a shebang", + filepath.Base(scriptPath)) + } +} + +// judgeTempDir returns an absolute temporary directory for a single script +// judge run, appropriate for the target OS. +func judgeTempDir(targetGOOS string) string { + name := fmt.Sprintf("skill-up-judge-%d", time.Now().UnixNano()) + if targetGOOS == platform.GOOSWindows { + return filepath.Join(os.TempDir(), name) + } + return path.Join("/tmp", name) +} + +// joinForGOOS joins path elements using the separator of the target OS. +func joinForGOOS(targetGOOS string, elem ...string) string { + if targetGOOS == platform.GOOSWindows { + return filepath.Join(elem...) + } + return path.Join(elem...) +} + +// shebangPOSIXShells lists interpreter basenames that the Windows planner +// is willing to dispatch through Git Bash. Only `sh` and `bash` are listed: +// the Windows `.sh` runner always invokes the discovered bash, so dropping +// a `#!/usr/bin/env zsh` script into bash would silently change semantics. +// Other POSIX shells (dash, ksh, zsh, ash) are intentionally rejected so +// the planner returns "cannot determine interpreter" rather than mis-routing. +// On POSIX targets this list is irrelevant: planScript ignores extension +// and lets the kernel honor the script's actual shebang. +var shebangPOSIXShells = map[string]bool{ + "sh": true, "bash": true, +} + +// PowerShell interpreter basenames recognized by the Windows planner. +// `pwsh` is PowerShell Core 7+; `powershell` is the legacy Windows +// PowerShell 5.x. They differ in syntax and module set, so we track them +// separately and pick the binary that matches the shebang when present. +const ( + psInterpCore = "pwsh" + psInterpLegacy = "powershell" +) + +// extensionForShebangInterpreter maps a parsed shebang interpreter basename +// to the synthetic file extension planWindowsScript dispatches on. Returns +// "" for empty / unrecognized interpreters so the planner reports +// "cannot determine interpreter" rather than mis-routing the script. +func extensionForShebangInterpreter(interp string) string { + if interp == "" { + return "" + } + switch interp { + case psInterpCore, psInterpLegacy: + return ".ps1" + } + if shebangPOSIXShells[interp] { + return ".sh" + } + return "" +} + +// shebangExtension reads scriptPath's first line and returns the synthetic +// extension implied by its shebang. Kept as a thin wrapper for tests that +// exercise the full path → extension flow. +func shebangExtension(scriptPath string) string { + interp, _ := parseShebang(readShebang(scriptPath)) + return extensionForShebangInterpreter(interp) +} + +// readShebang returns the body of scriptPath's first line when it is a +// shebang (everything after `#!`), or "" when there is no recognizable +// shebang or the file cannot be opened. +func readShebang(scriptPath string) string { + f, err := os.Open(scriptPath) //nolint:gosec // scriptPath is a caller-provided evaluation script + if err != nil { + return "" + } + defer func() { _ = f.Close() }() + + sc := bufio.NewScanner(f) + if !sc.Scan() { + return "" + } + line := strings.TrimSpace(sc.Text()) + if !strings.HasPrefix(line, "#!") { + return "" + } + return line[2:] +} + +// parseShebang splits a shebang body into (interpreter basename, options +// passed through to the interpreter). It understands direct paths +// (`#!/bin/bash -eu`), the `/usr/bin/env ` form, and both the +// split `env -S ` and compact `env -S` GNU extensions. +// +// The `-S ` body is treated as whitespace-separated tokens; nested +// shell quoting inside -S (e.g. `-S bash -c "echo hi"`) is not parsed and +// the embedded quotes survive in the returned options. Real-world judge +// shebangs use only flag-style options, where this approximation is fine. +// +// Returns ("", nil) when the body has no usable interpreter. +func parseShebang(body string) (string, []string) { + fields := strings.Fields(body) + if len(fields) == 0 { + return "", nil + } + if filepath.Base(fields[0]) == "env" { + return parseEnvShebang(fields[1:]) + } + return filepath.Base(fields[0]), append([]string{}, fields[1:]...) +} + +// envValueTakingShortFlags lists GNU env short flags that consume a separate +// value token after them in a shebang. +var envValueTakingShortFlags = map[string]bool{ + "-u": true, // --unset NAME + "-C": true, // --chdir DIR + "-a": true, // --argv0 ARG (override argv[0]) +} + +// envValueTakingLongFlags lists GNU env long flags that accept their value +// as the next token (the `--name=value` form is self-contained and handled +// by the generic `strings.HasPrefix(f, "-")` skip arm). +// +// Intentionally omitted: --block-signal, --default-signal, --ignore-signal. +// GNU env documents those as OPTIONAL-argument flags (`[=sig]`) -- without +// the `=sig` suffix they take no value, so consuming the next token would +// swallow the interpreter (e.g. `env --ignore-signal bash` would otherwise +// be parsed as flag `--ignore-signal` with value `bash` and no interpreter +// remaining). +var envValueTakingLongFlags = map[string]bool{ + "--unset": true, + "--chdir": true, + "--argv0": true, +} + +// isEnvAssignment reports whether tok has the env `NAME=VALUE` shape that +// GNU env accepts in front of the command. The portable form is +// `[A-Za-z_][A-Za-z0-9_]*=...`; everything before the first `=` must be a +// valid C-identifier byte sequence. +func isEnvAssignment(tok string) bool { + eq := strings.IndexByte(tok, '=') + if eq <= 0 { + return false + } + for i := range eq { + c := tok[i] + switch { + case c == '_': + case c >= 'A' && c <= 'Z': + case c >= 'a' && c <= 'z': + case i > 0 && c >= '0' && c <= '9': + default: + return false + } + } + return true +} + +// parseEnvShebang processes the args after `env` in a shebang line. +func parseEnvShebang(args []string) (string, []string) { + for i := 0; i < len(args); { //nolint:intrange // we conditionally advance i by 2 when consuming flag values + f := args[i] + switch { + case f == "-S" || f == "--split-string": + // Split form: -S takes everything that follows as one logical + // string. On a kernel shebang only one arg reaches env, so + // rejoining with spaces matches what env would receive. + return splitStringInterpreter(strings.Join(args[i+1:], " ")) + case strings.HasPrefix(f, "--split-string="): + // Long compact: --split-string=; any trailing tokens + // were whitespace-split out of the same logical arg, so + // rejoin them like the kernel-shebang one-arg rule. + rest := strings.TrimPrefix(f, "--split-string=") + if i+1 < len(args) { + if rest != "" { + rest += " " + } + rest += strings.Join(args[i+1:], " ") + } + return splitStringInterpreter(rest) + case strings.HasPrefix(f, "-S"): + // Compact form: -S packs the split-string into the same + // token. Strip the -S prefix and concatenate any trailing + // tokens with a space (mirroring the kernel-shebang one-arg + // rule). + rest := strings.TrimPrefix(f, "-S") + if i+1 < len(args) { + if rest != "" { + rest += " " + } + rest += strings.Join(args[i+1:], " ") + } + return splitStringInterpreter(rest) + case envValueTakingShortFlags[f], envValueTakingLongFlags[f]: + // Consume the flag and its separate value token (e.g. + // `-u VAR`, `-C DIR`, `--unset FOO`, `--chdir /tmp`) so the + // value token does not get mistaken for the interpreter. + i += 2 + case strings.HasPrefix(f, "-"): + // Self-contained env flag (-i, --ignore-environment, + // --chdir=DIR, --unset=VAR, ...). Skip the single token. + i++ + default: + // First non-flag token. GNU env allows leading `NAME=VALUE` + // assignments before the command (e.g. + // `env PYTHONPATH=/opt python3`); the helper skips those so we + // land on the real interpreter. + return interpreterFromArgs(args[i:]) + } + } + return "", nil +} + +// interpreterFromArgs returns the interpreter and trailing args after +// skipping leading GNU env `NAME=VALUE` assignments. The first non- +// assignment token is the interpreter; everything after it is forwarded. +func interpreterFromArgs(args []string) (string, []string) { + i := 0 + for i < len(args) && isEnvAssignment(args[i]) { + i++ + } + if i >= len(args) { + return "", nil + } + return filepath.Base(args[i]), append([]string{}, args[i+1:]...) +} + +// splitStringInterpreter parses the body that env's -S would split. The +// tokenizer respects single and double quotes plus backslash escapes so +// shebangs like `#!/usr/bin/env -S bash -c "echo ok"` produce the same argv +// as a POSIX kernel-shebang dispatch. Full env-S escape sequences (\xHH, +// \n, \t, etc.) are not decoded -- they are rarely used in real script-judge +// shebangs and decoding them would not survive in the cmd-fallback path +// anyway. Unterminated quotes return ("", nil) which the planner converts +// into a "cannot determine interpreter" error. +func splitStringInterpreter(body string) (string, []string) { + tokens, err := tokenizeShebangSplitString(body) + if err != nil || len(tokens) == 0 { + return "", nil + } + // GNU env allows leading `NAME=VALUE` assignments inside -S too + // (e.g. `env -S PYTHONPATH=/opt python3`); reuse the same skip helper. + return interpreterFromArgs(tokens) +} + +// tokenizerState carries the small bit of state the tokenizer mutates as it +// walks body. Keeping it in a struct lets the per-state handlers stay small +// enough that the top-level loop stays under gocyclo's threshold. +type tokenizerState struct { + cur strings.Builder + tokens []string + inSingle bool + inDouble bool + started bool + skipNext bool // when true, the next byte was consumed as an escape + done bool // set by `\c` in unquoted context: ignore the rest of body +} + +func (s *tokenizerState) flush() { + if s.started { + s.tokens = append(s.tokens, s.cur.String()) + s.cur.Reset() + s.started = false + } +} + +// tokenizeShebangSplitString splits body into shell-style tokens used by +// env -S: whitespace separates tokens; single quotes preserve every +// character literally until the next single quote; double quotes preserve +// characters with `\"` and `\\` decoded; outside quotes a backslash escapes +// the next character. Returns an error when a quote is left unterminated. +func tokenizeShebangSplitString(body string) ([]string, error) { + s := &tokenizerState{} + for i := 0; i < len(body); i++ { //nolint:intrange // we conditionally advance i to consume escape sequences + if s.skipNext { + s.skipNext = false + continue + } + next := byte(0) + if i+1 < len(body) { + next = body[i+1] + } + switch { + case s.inSingle: + tokenizeStepSingle(s, body[i]) + case s.inDouble: + tokenizeStepDouble(s, body[i], next) + default: + tokenizeStepUnquoted(s, body[i], next) + } + if s.done { + break + } + } + if s.inSingle || s.inDouble { + return nil, fmt.Errorf("unterminated quote in shebang -S body: %q", body) + } + s.flush() + return s.tokens, nil +} + +func tokenizeStepSingle(s *tokenizerState, c byte) { + if c == '\'' { + s.inSingle = false + } else { + s.cur.WriteByte(c) + } + s.started = true +} + +func tokenizeStepDouble(s *tokenizerState, c, next byte) { + switch { + case c == '"': + s.inDouble = false + case c == '\\' && (next == '"' || next == '\\'): + s.cur.WriteByte(next) + s.skipNext = true + default: + s.cur.WriteByte(c) + } + s.started = true +} + +// envSWhitespaceEscapes lists the env -S backslash escapes that decode to a +// whitespace character. In unquoted context they act as token separators -- +// most importantly `\_`, which is the documented way to embed a space +// inside an env -S body without surrounding quotes (e.g. `bash\_-eu`). +var envSWhitespaceEscapes = map[byte]bool{ + '_': true, // space + 't': true, + 'n': true, + 'r': true, + 'v': true, + 'f': true, +} + +func tokenizeStepUnquoted(s *tokenizerState, c, next byte) { + switch c { + case ' ', '\t', '\n', '\v', '\r', '\f': + s.flush() + case '\'': + s.inSingle = true + s.started = true + case '"': + s.inDouble = true + s.started = true + case '\\': + if next == 0 { + return + } + switch { + case next == 'c': + // GNU env -S documents `\c` (unquoted) as "ignore the rest + // of the split-string body". Stop tokenization right here so + // the planner sees only the tokens emitted so far. + s.flush() + s.skipNext = true + s.done = true + return + case envSWhitespaceEscapes[next]: + // Whitespace escape: ends the current token without writing. + s.flush() + default: + s.cur.WriteByte(next) + s.started = true + } + s.skipNext = true + default: + s.cur.WriteByte(c) + s.started = true + } +} diff --git a/internal/judge/interpreter_test.go b/internal/judge/interpreter_test.go new file mode 100644 index 0000000..5d4f843 --- /dev/null +++ b/internal/judge/interpreter_test.go @@ -0,0 +1,349 @@ +package judge + +import ( + "os" + "path/filepath" + goruntime "runtime" + "strings" + "testing" + + "github.com/alibaba/skill-up/internal/platform" +) + +func TestPlanScript_POSIXTarget(t *testing.T) { + plan, err := planScript("/skill/evals/check.sh", "linux") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if plan.uploadName != "script" { + t.Fatalf("uploadName = %q, want \"script\"", plan.uploadName) + } + got := plan.command("/tmp/d/script") + want := "chmod 700 '/tmp/d/script' && '/tmp/d/script'" + if got != want { + t.Fatalf("command = %q, want %q", got, want) + } + if got := plan.envPath("/tmp/d/transcript.json"); got != "/tmp/d/transcript.json" { + t.Fatalf("POSIX envPath should be identity, got %q", got) + } +} + +// TestHostShellQuote verifies the quoter selected by platform.Host() per +// host OS. POSIX hosts always use QuotePOSIX (single quotes). Windows hosts +// use bash-double-quote escapes when bash is discoverable (every \ / " / +// $ / backtick doubled so bash decodes them back to the original byte) and +// plain QuoteWindows otherwise. +func TestHostShellQuote(t *testing.T) { + shell := platform.Host() + got := shell.Quote(`C:\tmp\$VAR\script.cmd`) + if goruntime.GOOS != "windows" { + want := `'C:\tmp\$VAR\script.cmd'` + if got != want { + t.Fatalf("posix shell.Quote = %q, want %q", got, want) + } + return + } + if shell.IsBash { + want := `"C:\\tmp\\\$VAR\\script.cmd"` + if got != want { + t.Fatalf("windows+bash shell.Quote = %q, want %q", got, want) + } + } else { + want := `"C:\tmp\$VAR\script.cmd"` + if got != want { + t.Fatalf("windows+cmd shell.Quote = %q, want %q", got, want) + } + } +} + +func TestParseShebang(t *testing.T) { + tests := []struct { + name, body string + wantInt string + wantOpts []string + }{ + {"empty", "", "", nil}, + {"posix sh", "/bin/sh", "sh", []string{}}, + {"bash with opts", "/bin/bash -eu", "bash", []string{"-eu"}}, + {"env bash", "/usr/bin/env bash", "bash", []string{}}, + {"env -S split", "/usr/bin/env -S bash -eu", "bash", []string{"-eu"}}, + {"env -S compact", "/usr/bin/env -Sbash -eu", "bash", []string{"-eu"}}, + {"env -S compact full", "/usr/bin/env -Sbash\t-eu", "bash", []string{"-eu"}}, + {"env --split-string=", "/usr/bin/env --split-string=bash -eu", "bash", []string{"-eu"}}, + {"env -i python", "/usr/bin/env -i python3", "python3", []string{}}, + // Value-taking short flags (-u NAME, -C DIR) must consume their + // value token so it does not get mistaken for the interpreter. + {"env -u VAR bash", "/usr/bin/env -u FOO bash -eu", "bash", []string{"-eu"}}, + {"env -C DIR bash", "/usr/bin/env -C /tmp bash", "bash", []string{}}, + // Long-form value-taking flags in split form too (the =NAME form + // is already handled by the generic "starts with -" skip arm). + {"env --unset split", "/usr/bin/env --unset FOO bash -eu", "bash", []string{"-eu"}}, + {"env --chdir split", "/usr/bin/env --chdir /tmp bash", "bash", []string{}}, + // env -S with quoted bash -c "..." must preserve the quoted arg + // as one token instead of breaking it on whitespace. + {"env -S bash -c quoted", `/usr/bin/env -S bash -c "echo ok"`, "bash", []string{"-c", "echo ok"}}, + // env -S \_ (backslash-underscore) is the documented separator + // for embedding a space inside the split-string body. + {"env -S backslash space", `/usr/bin/env -S bash\_-eu`, "bash", []string{"-eu"}}, + {"env -S backslash tab", `/usr/bin/env -S bash\t-eu`, "bash", []string{"-eu"}}, + {"only env flags", "/usr/bin/env -S", "", nil}, + // GNU env -a / --argv0=ARG overrides argv[0] of the executed + // command. -a takes a separate token; the parser must consume the + // argv0 value so it does not get mistaken for the interpreter. + {"env -a argv0 bash", "/usr/bin/env -a myargv0 bash -eu", "bash", []string{"-eu"}}, + // --default-signal / --ignore-signal / --block-signal are + // optional-argument flags (`[=sig]`). Without `=sig` they take no + // value, so the next token IS the interpreter. + {"env --ignore-signal bash", "/usr/bin/env --ignore-signal bash -eu", "bash", []string{"-eu"}}, + {"env --default-signal bash", "/usr/bin/env --default-signal bash", "bash", []string{}}, + // GNU env accepts leading `NAME=VALUE` assignments before the + // command. The parser must skip them and pick the first + // non-assignment token as the interpreter. + {"env NAME=VALUE bash", "/usr/bin/env PYTHONPATH=/opt python3 -V", "python3", []string{"-V"}}, + {"env -S NAME=VALUE bash", "/usr/bin/env -S PYTHONPATH=/opt python3 -V", "python3", []string{"-V"}}, + // `\c` in unquoted env -S body terminates tokenization; tokens + // after it are ignored entirely. + {"env -S backslash c terminator", `/usr/bin/env -S bash -eu\c trailing junk`, "bash", []string{"-eu"}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotInt, gotOpts := parseShebang(tt.body) + if gotInt != tt.wantInt { + t.Fatalf("interpreter = %q, want %q", gotInt, tt.wantInt) + } + if len(gotOpts) != len(tt.wantOpts) { + t.Fatalf("opts = %v, want %v", gotOpts, tt.wantOpts) + } + for i := range gotOpts { + if gotOpts[i] != tt.wantOpts[i] { + t.Fatalf("opts[%d] = %q, want %q", i, gotOpts[i], tt.wantOpts[i]) + } + } + }) + } +} + +// POSIX targets preserve the original behavior: the file extension is ignored +// and the script runs via its own shebang. +func TestPlanScript_POSIXTarget_IgnoresExtension(t *testing.T) { + plan, err := planScript("/skill/evals/check.ps1", "darwin") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if plan.uploadName != "script" { + t.Fatalf("uploadName = %q, want \"script\"", plan.uploadName) + } +} + +func TestPlanWindowsScript(t *testing.T) { + tests := []struct { + name string + scriptPath string + wantUpload string + wantCmdHead string + }{ + {"powershell", `C:\skill\check.ps1`, "script.ps1", "powershell -NoProfile -ExecutionPolicy Bypass -File "}, + {"cmd", `C:\skill\check.cmd`, "script.cmd", "cmd /d /s /c "}, + {"bat", `C:\skill\check.bat`, "script.bat", "cmd /d /s /c "}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + plan, err := planWindowsScript(tt.scriptPath, platform.Host()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if plan.uploadName != tt.wantUpload { + t.Fatalf("uploadName = %q, want %q", plan.uploadName, tt.wantUpload) + } + if cmd := plan.command(`C:\tmp\d\` + tt.wantUpload); !strings.HasPrefix(cmd, tt.wantCmdHead) { + t.Fatalf("command = %q, want prefix %q", cmd, tt.wantCmdHead) + } + }) + } +} + +func TestPlanWindowsScript_UnknownInterpreter(t *testing.T) { + dir := t.TempDir() + scriptPath := filepath.Join(dir, "mystery.txt") + if err := os.WriteFile(scriptPath, []byte("echo hi\n"), 0o600); err != nil { + t.Fatalf("write: %v", err) + } + _, err := planWindowsScript(scriptPath, platform.Host()) + if err == nil || !strings.Contains(err.Error(), "cannot determine interpreter") { + t.Fatalf("expected cannot-determine-interpreter error, got: %v", err) + } +} + +// .ps1 file with a shebang that names pwsh must dispatch to pwsh.exe, not +// the legacy powershell.exe. +func TestPlanWindowsScript_PS1ShebangPwsh(t *testing.T) { + dir := t.TempDir() + scriptPath := filepath.Join(dir, "check.ps1") + if err := os.WriteFile(scriptPath, []byte("#!/usr/bin/env pwsh\nWrite-Host hi\n"), 0o600); err != nil { + t.Fatalf("write: %v", err) + } + plan, err := planWindowsScript(scriptPath, platform.Host()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + cmd := plan.command(`C:\tmp\d\script.ps1`) + if !strings.HasPrefix(cmd, "pwsh ") { + t.Fatalf("expected pwsh prefix, got %q", cmd) + } + if strings.HasPrefix(cmd, "powershell ") { + t.Fatalf("pwsh shebang should not route to powershell.exe: %q", cmd) + } +} + +// .ps1 file with `#!/usr/bin/env -S pwsh -NoLogo` must forward -NoLogo to +// the interpreter -- shebang options must not be silently dropped. +func TestPlanWindowsScript_PS1ShebangForwardsOpts(t *testing.T) { + dir := t.TempDir() + scriptPath := filepath.Join(dir, "check.ps1") + if err := os.WriteFile(scriptPath, []byte("#!/usr/bin/env -S pwsh -NoLogo\nWrite-Host hi\n"), 0o600); err != nil { + t.Fatalf("write: %v", err) + } + plan, err := planWindowsScript(scriptPath, platform.Host()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + cmd := plan.command(`C:\tmp\d\script.ps1`) + if !strings.Contains(cmd, "-NoLogo") { + t.Fatalf("expected -NoLogo forwarded from shebang, got %q", cmd) + } + // Sanity: defaults still get appended. + for _, want := range []string{"-NoProfile", "-ExecutionPolicy", "Bypass", "-File"} { + if !strings.Contains(cmd, want) { + t.Fatalf("expected %q in command, got %q", want, cmd) + } + } +} + +// .ps1 file with no shebang (or a non-PowerShell shebang) must keep the +// legacy default: powershell.exe with the standard flag set. +func TestPlanWindowsScript_PS1NoShebangUsesLegacyDefault(t *testing.T) { + dir := t.TempDir() + scriptPath := filepath.Join(dir, "check.ps1") + if err := os.WriteFile(scriptPath, []byte("Write-Host hi\n"), 0o600); err != nil { + t.Fatalf("write: %v", err) + } + plan, err := planWindowsScript(scriptPath, platform.Host()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + cmd := plan.command(`C:\tmp\d\script.ps1`) + if !strings.HasPrefix(cmd, "powershell -NoProfile -ExecutionPolicy Bypass -File ") { + t.Fatalf("expected legacy powershell default, got %q", cmd) + } +} + +// TestPlanWindowsScript_ShellScript covers the .sh branch, whose outcome +// depends on whether bash is discoverable on the host running the test. +func TestPlanWindowsScript_ShellScript(t *testing.T) { + plan, err := planWindowsScript(`C:\skill\check.sh`, platform.Host()) + if _, ok := platform.DiscoverBash(); ok { + if err != nil { + t.Fatalf("bash is available but planning failed: %v", err) + } + if plan.uploadName != "script.sh" { + t.Fatalf("uploadName = %q, want \"script.sh\"", plan.uploadName) + } + return + } + if err == nil || !strings.Contains(err.Error(), "requires bash on Windows") { + t.Fatalf("expected bash-required error, got: %v", err) + } +} + +func TestShebangExtension(t *testing.T) { + tests := []struct { + name string + content string + want string + }{ + {"posix sh", "#!/bin/sh\necho hi\n", ".sh"}, + {"env bash", "#!/usr/bin/env bash\necho hi\n", ".sh"}, + {"env -S bash", "#!/usr/bin/env -S bash -eu\necho hi\n", ".sh"}, + {"pwsh", "#!/usr/bin/env pwsh\nWrite-Host hi\n", ".ps1"}, + {"powershell direct", "#!/usr/local/bin/powershell\nWrite-Host hi\n", ".ps1"}, + {"no shebang", "echo hi\n", ""}, + {"empty", "", ""}, + {"unrecognized ruby", "#!/usr/bin/env ruby\nputs 1\n", ""}, + // fish, ksh-suffixed names etc. must not be misclassified as `.sh` + // just because their name contains the letters "sh". + {"fish not sh", "#!/usr/bin/env fish\necho hi\n", ""}, + {"python not sh", "#!/usr/bin/env python3\nprint(1)\n", ""}, + {"swish not sh", "#!/usr/local/bin/swish\n", ""}, + // On Windows the .sh runner always invokes bash; non-bash POSIX + // shells must not get silently routed to it. Reject so the planner + // reports "cannot determine interpreter". + {"zsh rejected", "#!/usr/bin/env zsh\necho hi\n", ""}, + {"dash rejected", "#!/bin/dash\necho hi\n", ""}, + {"ksh rejected", "#!/usr/bin/env ksh\necho hi\n", ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dir := t.TempDir() + p := filepath.Join(dir, "script") + if err := os.WriteFile(p, []byte(tt.content), 0o600); err != nil { + t.Fatalf("write: %v", err) + } + if got := shebangExtension(p); got != tt.want { + t.Fatalf("shebangExtension = %q, want %q", got, tt.want) + } + }) + } +} + +func TestCleanupCommand_POSIX(t *testing.T) { + plan, err := planScript("/skill/check.sh", "linux") + if err != nil { + t.Fatalf("planScript: %v", err) + } + if got, want := plan.cleanupCommand("/tmp/d"), "rm -rf '/tmp/d'"; got != want { + t.Fatalf("posix cleanupCommand = %q, want %q", got, want) + } +} + +func TestCleanupCommand_Windows(t *testing.T) { + plan, err := planWindowsScript(`C:\skill\check.ps1`, platform.Host()) + if err != nil { + t.Fatalf("planWindowsScript: %v", err) + } + got := plan.cleanupCommand(`C:\tmp\d`) + if !strings.HasPrefix(got, "cmd /d /s /c rd /s /q ") { + t.Fatalf("windows cleanupCommand = %q, want prefix %q", got, "cmd /d /s /c rd /s /q ") + } +} + +// .sh on Windows cleans up via bash `rm -rf` rather than dispatching to cmd, +// avoiding the bash -> cmd hop that the .ps1/.cmd/.bat paths necessarily take. +func TestCleanupCommand_Windows_ShellScriptUsesBashRm(t *testing.T) { + if _, ok := platform.DiscoverBash(); !ok { + t.Skip("requires bash to plan a .sh Windows script") + } + plan, err := planWindowsScript(`C:\skill\check.sh`, platform.Host()) + if err != nil { + t.Fatalf("planWindowsScript: %v", err) + } + got := plan.cleanupCommand(`C:\tmp\d`) + if !strings.HasPrefix(got, "rm -rf ") { + t.Fatalf("windows .sh cleanupCommand = %q, want prefix %q", got, "rm -rf ") + } + if strings.Contains(got, "cmd /") { + t.Fatalf("windows .sh cleanupCommand should not invoke cmd, got %q", got) + } + // Note: filepath.ToSlash is a no-op when this test runs on a POSIX host + // (separator is `/`, no backslashes to convert), so we cannot assert the + // forward-slash form here. On a real Windows host the production code + // path converts the backslashes; behaviour is exercised by Windows CI. +} + +func TestJudgeTempDir(t *testing.T) { + if d := judgeTempDir("linux"); !strings.HasPrefix(d, "/tmp/skill-up-judge-") { + t.Fatalf("posix judgeTempDir = %q, want /tmp/skill-up-judge- prefix", d) + } + if d := judgeTempDir("windows"); !strings.Contains(d, "skill-up-judge-") { + t.Fatalf("windows judgeTempDir = %q, want skill-up-judge- substring", d) + } +} diff --git a/internal/judge/script.go b/internal/judge/script.go index eeeeb6e..441737b 100644 --- a/internal/judge/script.go +++ b/internal/judge/script.go @@ -4,13 +4,11 @@ import ( "context" "fmt" "log" - "path" "strconv" "strings" "time" evalruntime "github.com/alibaba/skill-up/internal/runtime" - "github.com/alibaba/skill-up/internal/shellquote" ) // DefaultScriptTimeout is the default timeout for script execution (30s). @@ -76,33 +74,48 @@ func (j *ScriptJudge) runtime(ctx context.Context) (evalruntime.Runtime, func(), } func (j *ScriptJudge) evaluateInRuntime(ctx context.Context, rt evalruntime.Runtime, in Input, timeout time.Duration) (*Result, error) { - remoteDir := path.Join("/tmp", fmt.Sprintf("skill-up-judge-%d", time.Now().UnixNano())) - remoteScript := path.Join(remoteDir, "script") + targetGOOS := rt.TargetGOOS() + plan, err := planScript(j.ScriptPath, targetGOOS) + if err != nil { + return nil, fmt.Errorf("script execution failed: %w", err) + } + + remoteDir := judgeTempDir(targetGOOS) + remoteScript := joinForGOOS(targetGOOS, remoteDir, plan.uploadName) if err := rt.UploadFile(ctx, j.ScriptPath, remoteScript); err != nil { return nil, fmt.Errorf("script execution failed: upload script judge: %w", err) } defer func() { - _, _ = rt.Exec(context.WithoutCancel(ctx), "rm -rf "+shellquote.Quote(remoteDir), evalruntime.ExecOptions{}) + _, _ = rt.Exec(context.WithoutCancel(ctx), plan.cleanupCommand(remoteDir), evalruntime.ExecOptions{}) }() remoteTranscript := "" if j.TranscriptPath != "" { - remoteTranscript = path.Join(remoteDir, "transcript.json") + remoteTranscript = joinForGOOS(targetGOOS, remoteDir, "transcript.json") if err := rt.UploadFile(ctx, j.TranscriptPath, remoteTranscript); err != nil { return nil, fmt.Errorf("script execution failed: upload script judge transcript: %w", err) } } - command := "chmod 700 " + shellquote.Quote(remoteScript) + " && " + shellquote.Quote(remoteScript) + command := plan.command(remoteScript) cwd := in.WorkspacePath if cwd == "" { cwd = rt.Workspace() } + // Translate the transcript path into the script interpreter's preferred + // form (e.g. forward slashes for `.sh` running under Git Bash so POSIX + // tools can `cat "$EVAL_TRANSCRIPT_PATH"`). Every plan returned by + // planScript sets envPath (identity on POSIX, ToSlash on Windows .sh), + // so the function is always safe to invoke. + transcriptEnv := remoteTranscript + if remoteTranscript != "" { + transcriptEnv = plan.envPath(remoteTranscript) + } result, err := rt.Exec(ctx, command, evalruntime.ExecOptions{ Cwd: cwd, TimeoutSec: int(timeout.Seconds()), Env: map[string]string{ - "EVAL_TRANSCRIPT_PATH": remoteTranscript, + "EVAL_TRANSCRIPT_PATH": transcriptEnv, "EVAL_FINAL_MESSAGE": in.FinalMessage, "EVAL_EXIT_CODE": strconv.Itoa(in.ExitCode), }, diff --git a/internal/judge/script_test.go b/internal/judge/script_test.go index 7c4dce8..279655a 100644 --- a/internal/judge/script_test.go +++ b/internal/judge/script_test.go @@ -8,29 +8,36 @@ import ( "strings" "testing" + "github.com/alibaba/skill-up/internal/platform" evalruntime "github.com/alibaba/skill-up/internal/runtime" ) -const osWindows = "windows" - -// scriptJudgeCase parametrises the common "write script → run judge → -// assert status & evidence" flow so individual test bodies stay focused on -// what's unique. Extracted to silence dupl on near-identical pass/fail cases. +// scriptJudgeCase parametrises the common "write script → run judge → assert +// status & evidence" flow so individual test bodies stay focused on what's +// unique. Each case carries both a POSIX (.sh) and a Windows (.cmd) script +// body; runScriptJudgeCase picks the one matching the host OS, since these +// tests exercise the none runtime, which executes on the host. type scriptJudgeCase struct { - scriptName string - scriptBody string + posixScript string + windowsScript string input Input expectStatus Status evidenceMatch string } +// script returns the upload name and body appropriate for the host OS. +func (tc scriptJudgeCase) script() (name, body string) { + if runtime.GOOS == platform.GOOSWindows { + return "check.cmd", tc.windowsScript + } + return "check.sh", tc.posixScript +} + func runScriptJudgeCase(t *testing.T, tc scriptJudgeCase) { t.Helper() - if runtime.GOOS == osWindows { - t.Skip("skipping on windows") - } dir := t.TempDir() - script := writeScript(t, dir, tc.scriptName, tc.scriptBody) + name, body := tc.script() + script := writeScript(t, dir, name, body) in := tc.input if in.WorkspacePath == "" { in.WorkspacePath = dir @@ -50,8 +57,8 @@ func runScriptJudgeCase(t *testing.T, tc scriptJudgeCase) { func TestScriptJudge_Pass(t *testing.T) { runScriptJudgeCase(t, scriptJudgeCase{ - scriptName: "check.sh", - scriptBody: "#!/bin/sh\necho \"all checks passed\"\nexit 0\n", + posixScript: "#!/bin/sh\necho \"all checks passed\"\nexit 0\n", + windowsScript: "@echo off\r\necho all checks passed\r\nexit /b 0\r\n", input: Input{ FinalMessage: "test output", ExitCode: 0, @@ -63,8 +70,8 @@ func TestScriptJudge_Pass(t *testing.T) { func TestScriptJudge_Pass_EmptyStdout(t *testing.T) { runScriptJudgeCase(t, scriptJudgeCase{ - scriptName: "check.sh", - scriptBody: "#!/bin/sh\nexit 0\n", + posixScript: "#!/bin/sh\nexit 0\n", + windowsScript: "@echo off\r\nexit /b 0\r\n", expectStatus: StatusPass, evidenceMatch: "script passed", }) @@ -76,8 +83,8 @@ func TestScriptJudge_Pass_EmptyStdout(t *testing.T) { func TestScriptJudge_Fail_NonZeroExit(t *testing.T) { runScriptJudgeCase(t, scriptJudgeCase{ - scriptName: "check.sh", - scriptBody: "#!/bin/sh\necho \"review quality below threshold\"\nexit 1\n", + posixScript: "#!/bin/sh\necho \"review quality below threshold\"\nexit 1\n", + windowsScript: "@echo off\r\necho review quality below threshold\r\nexit /b 1\r\n", expectStatus: StatusFail, evidenceMatch: "review quality below threshold", }) @@ -85,8 +92,8 @@ func TestScriptJudge_Fail_NonZeroExit(t *testing.T) { func TestScriptJudge_Fail_NonZeroExit_EmptyStdout(t *testing.T) { runScriptJudgeCase(t, scriptJudgeCase{ - scriptName: "check.sh", - scriptBody: "#!/bin/sh\nexit 2\n", + posixScript: "#!/bin/sh\nexit 2\n", + windowsScript: "@echo off\r\nexit /b 2\r\n", expectStatus: StatusFail, evidenceMatch: "exited with code 2", }) @@ -98,8 +105,8 @@ func TestScriptJudge_Fail_NonZeroExit_EmptyStdout(t *testing.T) { func TestScriptJudge_StderrCaptured(t *testing.T) { runScriptJudgeCase(t, scriptJudgeCase{ - scriptName: "check.sh", - scriptBody: "#!/bin/sh\necho \"passed\"\necho \"debug info here\" >&2\nexit 0\n", + posixScript: "#!/bin/sh\necho \"passed\"\necho \"debug info here\" >&2\nexit 0\n", + windowsScript: "@echo off\r\necho passed\r\necho debug info here 1>&2\r\nexit /b 0\r\n", expectStatus: StatusPass, evidenceMatch: "debug info here", }) @@ -110,11 +117,14 @@ func TestScriptJudge_StderrCaptured(t *testing.T) { // --------------------------------------------------------------------------- func TestScriptJudge_Timeout(t *testing.T) { - if runtime.GOOS == osWindows { - t.Skip("skipping on windows") - } dir := t.TempDir() - script := writeScript(t, dir, "slow.sh", "#!/bin/sh\nsleep 10\nexit 0\n") + name, body := "slow.sh", "#!/bin/sh\nsleep 10\nexit 0\n" + if runtime.GOOS == platform.GOOSWindows { + // ping -n 11 to a local address waits ~10s without needing console + // input (unlike `timeout`). + name, body = "slow.cmd", "@echo off\r\nping -n 11 127.0.0.1 >nul\r\nexit /b 0\r\n" + } + script := writeScript(t, dir, name, body) j := &ScriptJudge{ ScriptPath: script, TimeoutSeconds: 1, @@ -148,14 +158,16 @@ func TestScriptJudge_ScriptNotFound(t *testing.T) { // --------------------------------------------------------------------------- func TestScriptJudge_EnvVarsInjected(t *testing.T) { - if runtime.GOOS == osWindows { - t.Skip("skipping on windows") - } dir := t.TempDir() - script := writeScript(t, dir, "check_env.sh", `#!/bin/sh -echo "transcript=$EVAL_TRANSCRIPT_PATH final=$EVAL_FINAL_MESSAGE exit=$EVAL_EXIT_CODE" -exit 0 -`) + name, body := "check_env.sh", "#!/bin/sh\n"+ + "echo \"transcript=$EVAL_TRANSCRIPT_PATH final=$EVAL_FINAL_MESSAGE exit=$EVAL_EXIT_CODE\"\n"+ + "exit 0\n" + if runtime.GOOS == platform.GOOSWindows { + name, body = "check_env.cmd", "@echo off\r\n"+ + "echo transcript=%EVAL_TRANSCRIPT_PATH% final=%EVAL_FINAL_MESSAGE% exit=%EVAL_EXIT_CODE%\r\n"+ + "exit /b 0\r\n" + } + script := writeScript(t, dir, name, body) j := &ScriptJudge{ ScriptPath: script, TranscriptPath: filepath.Join(dir, "transcript.json"), @@ -171,7 +183,7 @@ exit 0 assertNoError(t, err) assertStatus(t, r, StatusPass) ev := r.AssertionResults[0].Evidence - if !strings.Contains(ev, "transcript=/tmp/skill-up-judge-") { + if !strings.Contains(ev, "transcript=") || !strings.Contains(ev, "skill-up-judge-") { t.Fatalf("EVAL_TRANSCRIPT_PATH not injected, evidence: %s", ev) } if !strings.Contains(ev, "final=hello world") { @@ -183,9 +195,6 @@ exit 0 } func TestScriptJudge_EvaluatesInRuntime(t *testing.T) { - if runtime.GOOS == osWindows { - t.Skip("skipping on windows") - } dir := t.TempDir() script := writeScript(t, dir, "check_runtime.sh", `#!/bin/sh echo "cwd=$(pwd) transcript=$EVAL_TRANSCRIPT_PATH final=$EVAL_FINAL_MESSAGE exit=$EVAL_EXIT_CODE" @@ -247,6 +256,7 @@ func (r *scriptJudgeRuntime) Start(context.Context) error { return nil } func (r *scriptJudgeRuntime) Stop(context.Context) error { return nil } func (r *scriptJudgeRuntime) Workspace() string { return r.workspace } func (r *scriptJudgeRuntime) RequiresProcessSandbox() bool { return true } +func (r *scriptJudgeRuntime) TargetGOOS() string { return "linux" } func (r *scriptJudgeRuntime) UploadFile(_ context.Context, _, targetPath string) error { r.uploads = append(r.uploads, targetPath) diff --git a/internal/mcp/mock_server_test.go b/internal/mcp/mock_server_test.go index 4ec6813..7c2c01c 100644 --- a/internal/mcp/mock_server_test.go +++ b/internal/mcp/mock_server_test.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "path/filepath" + goruntime "runtime" "strconv" "strings" "testing" @@ -22,6 +23,16 @@ type mockProc struct { func startMockServer(t *testing.T, script, dir string) *mockProc { t.Helper() + if goruntime.GOOS == "windows" { + // Every mocked MCP server test spawns a child Node process and + // exchanges Content-Length-framed JSON-RPC over its stdout. Node on + // default Windows applies codepage / CRLF translation to that + // stdout, which corrupts the framed byte stream and makes the + // reader hang on the response header until the 15s ctx timeout + // fires. Verifying the framing/transport on POSIX is sufficient; + // the framing logic itself is platform-independent Go code. + t.Skip("Node stdio codepage/CRLF translation corrupts framed binary on Windows") + } if _, err := exec.LookPath("node"); err != nil { t.Skip("node is required for mock server tests") } diff --git a/internal/platform/bash_other.go b/internal/platform/bash_other.go new file mode 100644 index 0000000..38ee53e --- /dev/null +++ b/internal/platform/bash_other.go @@ -0,0 +1,24 @@ +//go:build !windows + +package platform + +import ( + "os" + "os/exec" +) + +// DiscoverBash locates a bash interpreter on non-Windows hosts: the +// SKILL_UP_BASH override first, then PATH. +func DiscoverBash() (string, bool) { + if v := os.Getenv(BashEnvOverride); v != "" { + //nolint:gosec // v is an explicit user-supplied bash override path + if info, err := os.Stat(v); err == nil && !info.IsDir() { + return v, true + } + } + p, err := exec.LookPath("bash") + if err != nil { + return "", false + } + return p, true +} diff --git a/internal/platform/bash_windows.go b/internal/platform/bash_windows.go new file mode 100644 index 0000000..a40842e --- /dev/null +++ b/internal/platform/bash_windows.go @@ -0,0 +1,75 @@ +//go:build windows + +package platform + +import ( + "os" + "os/exec" + "path/filepath" + "strings" +) + +// knownWindowsBashPaths lists the default Git Bash install locations checked +// after BashEnvOverride and PATH. +// +// WSL's C:\Windows\System32\bash.exe is intentionally excluded: it expects +// Linux-format paths (/mnt/c/...), so script-judge commands built from +// Windows host paths would fail with file-not-found even though discovery +// succeeded. Users who want to drive skill-up through WSL bash can point +// SKILL_UP_BASH at it explicitly after arranging path translation upstream. +var knownWindowsBashPaths = []string{ + `C:\Program Files\Git\bin\bash.exe`, + `C:\Program Files (x86)\Git\bin\bash.exe`, +} + +// DiscoverBash locates a bash interpreter on Windows. It checks, in order: +// the SKILL_UP_BASH override, PATH (excluding the WSL shim under System32), +// then well-known Git Bash locations. +func DiscoverBash() (string, bool) { + if v := os.Getenv(BashEnvOverride); v != "" { + if isRegularFile(v) && !isWSLBash(v) { + return v, true + } + } + if p, err := exec.LookPath("bash"); err == nil && !isWSLBash(p) { + return p, true + } + for _, p := range knownWindowsBashPaths { + if isRegularFile(p) { + return p, true + } + } + return "", false +} + +// isWSLBash reports whether p is the WSL bash shim shipped with Windows. The +// shim lives under %SystemRoot%\System32, which is on PATH by default on every +// Windows host, so PATH-based discovery would otherwise prefer it. The shim +// expects Linux-format paths (`/mnt//...`) and silently fails on the +// Windows host paths we pass through, so we treat it as "no bash found" and +// fall through to the known Git Bash locations. +func isWSLBash(p string) bool { + abs, err := filepath.Abs(p) + if err != nil { + return false + } + system32 := windowsSystem32Dir() + if system32 == "" { + return false + } + return strings.EqualFold(filepath.Dir(abs), system32) +} + +func windowsSystem32Dir() string { + if root := os.Getenv("SystemRoot"); root != "" { + return filepath.Join(root, "System32") + } + // Fall back to the convention; SystemRoot is set on every modern Windows. + return `C:\Windows\System32` +} + +func isRegularFile(p string) bool { + //nolint:gosec // p is a known bash install location or a user-supplied override + info, err := os.Stat(p) + return err == nil && !info.IsDir() +} diff --git a/internal/platform/platform.go b/internal/platform/platform.go new file mode 100644 index 0000000..944502e --- /dev/null +++ b/internal/platform/platform.go @@ -0,0 +1,48 @@ +// Package platform centralizes OS-conditional process, shell, and executable +// discovery so the rest of skill-up does not scatter runtime.GOOS branches +// across packages. +package platform + +import ( + "context" + "os/exec" +) + +// BashEnvOverride is the environment variable a user may set to point at a +// specific bash interpreter, taking precedence over PATH and well-known +// install locations. +const BashEnvOverride = "SKILL_UP_BASH" + +// GOOS-value constants that callers across packages compare against +// runtime.GOOS / Runtime.TargetGOOS(). Centralizing them avoids string +// literals duplicated across the OS-dispatch sites. +const ( + GOOSWindows = "windows" + GOOSLinux = "linux" + GOOSDarwin = "darwin" +) + +// HostShell describes the shell that NoneRuntime.Exec will use on the +// current host. Callers that need to quote arguments for the same shell or +// inject extra environment variables it depends on should reach for Quote +// and Env rather than re-deriving them. +type HostShell struct { + // Cmd builds an exec.Cmd configured to run `command` through the host + // shell. The caller still sets Dir, Env (which must be merged with + // HostShell.Env), Stdout, and Stderr. + Cmd func(ctx context.Context, command string) *exec.Cmd + // Quote returns the argument-quoting suitable for the host shell. + Quote func(s string) string + // Env lists extra environment variables the shell needs to behave + // predictably (for example MSYS_NO_PATHCONV for Git Bash on Windows). + // Callers append it to their own env list; nil means "no extras". + Env []string + // IsBash reports whether the chosen shell is bash. POSIX hosts are + // always true; Windows is true only when DiscoverBash succeeded. + IsBash bool + // Bash is the discovered bash interpreter path, populated when IsBash + // is true. Callers building "bash invokes bash" pipelines (the .sh + // script-judge plan on Windows) read it from here so the choice of + // bash binary matches what Cmd will launch. + Bash string +} diff --git a/internal/platform/platform_test.go b/internal/platform/platform_test.go new file mode 100644 index 0000000..b793c1d --- /dev/null +++ b/internal/platform/platform_test.go @@ -0,0 +1,28 @@ +package platform + +import ( + "context" + "testing" +) + +func TestHost(t *testing.T) { + shell := Host() + if shell.Cmd == nil { + t.Fatal("Host returned a HostShell with nil Cmd") + } + if shell.Quote == nil { + t.Fatal("Host returned a HostShell with nil Quote") + } + cmd := shell.Cmd(context.Background(), "echo hi") + if cmd == nil { + t.Fatal("HostShell.Cmd returned nil") + } + if cmd.Path == "" { + t.Fatal("HostShell.Cmd produced a command with no executable path") + } + // On POSIX hosts bash should be discoverable (PATH has it); IsBash + // being false would point at a misconfigured runner. + if !shell.IsBash { + t.Logf("note: HostShell.IsBash is false (no bash discovered); the cmd fallback is exercised") + } +} diff --git a/internal/platform/shell_other.go b/internal/platform/shell_other.go new file mode 100644 index 0000000..bb7bd6f --- /dev/null +++ b/internal/platform/shell_other.go @@ -0,0 +1,46 @@ +//go:build !windows + +package platform + +import ( + "context" + "os/exec" + "sync" + + "github.com/alibaba/skill-up/internal/shellquote" +) + +// Host returns the descriptor of the shell NoneRuntime.Exec will use on the +// current host: how to launch a command, how to quote an argument for that +// shell, and any extra environment variables the shell needs to behave +// predictably. +// +// Centralizing all three lets callers (the script-judge planner especially) +// pick a quoter that matches the shell actually launched, without +// re-deriving the shell choice independently. +// +// On POSIX the shell is bash when discoverable, sh otherwise. POSIX +// single-quoting is correct in both cases. +// +// The result is cached for the process lifetime; see shell_windows.go for +// the rationale. +var hostShell = sync.OnceValue(buildHostShell) + +// Host returns the cached HostShell descriptor for the current POSIX host. +func Host() HostShell { return hostShell() } + +func buildHostShell() HostShell { + bash, hasBash := DiscoverBash() + shell := "sh" + if hasBash { + shell = bash + } + return HostShell{ + Cmd: func(ctx context.Context, command string) *exec.Cmd { + return exec.CommandContext(ctx, shell, "-c", command) + }, + Quote: shellquote.QuotePOSIX, + IsBash: hasBash, + Bash: bash, + } +} diff --git a/internal/platform/shell_windows.go b/internal/platform/shell_windows.go new file mode 100644 index 0000000..6c0b5cf --- /dev/null +++ b/internal/platform/shell_windows.go @@ -0,0 +1,91 @@ +//go:build windows + +package platform + +import ( + "context" + "os/exec" + "strings" + "sync" + "syscall" + + "github.com/alibaba/skill-up/internal/shellquote" +) + +// Host returns the descriptor of the shell NoneRuntime.Exec will use on the +// current Windows host. +// +// We prefer a discoverable bash (Git Bash via DiscoverBash) and fall back to +// cmd.exe when none is available. Choosing bash whenever possible keeps the +// many internal POSIX command strings -- agent CLI templates with single +// quotes, `set -eu` git fixtures, workspace-diff `if ...; then` pipelines -- +// working on a Windows host. The bash leg also injects MSYS_NO_PATHCONV / +// MSYS2_ARG_CONV_EXCL so MSYS does not rewrite `/x`-shaped argv entries +// before they reach native Windows binaries like cmd.exe or powershell. +// +// The cmd fallback uses SysProcAttr.CmdLine with `cmd /d /s /c ""`: +// `/d` disables HKLM/HKCU AutoRun and `/s` forces the deterministic +// strip-first-and-last-quote rule for the wrapping. Inside the wrapping the +// command must be CommandLineToArgvW-quoted, hence the QuoteWindows quoter. +// cmd.exe is not a POSIX shell, so bash-style command strings (the agent +// nvm/Node bootstrap) do not run natively on Windows under the cmd +// fallback; that remains a documented limitation in docs/guide/windows.md. +// +// The result is cached for the process lifetime: PATH / SKILL_UP_BASH are +// documented as read-once at startup (see BashEnvOverride), and the host's +// bash install does not change while skill-up is running. Caching avoids +// repeating LookPath/Stat work on every NoneRuntime.Exec. +var hostShell = sync.OnceValue(buildHostShell) + +// Host returns the cached HostShell descriptor for the current Windows host. +func Host() HostShell { return hostShell() } + +func buildHostShell() HostShell { + bash, hasBash := DiscoverBash() + if hasBash { + return HostShell{ + Cmd: func(ctx context.Context, command string) *exec.Cmd { + return exec.CommandContext(ctx, bash, "-c", command) + }, + Quote: quoteForBashDoubleQuote, + Env: []string{ + "MSYS_NO_PATHCONV=1", + "MSYS2_ARG_CONV_EXCL=*", + }, + IsBash: true, + Bash: bash, + } + } + return HostShell{ + Cmd: func(ctx context.Context, command string) *exec.Cmd { + cmd := exec.CommandContext(ctx, "cmd") + cmd.SysProcAttr = &syscall.SysProcAttr{CmdLine: `cmd /d /s /c "` + command + `"`} + return cmd + }, + Quote: shellquote.QuoteWindows, + IsBash: false, + } +} + +// quoteForBashDoubleQuote returns s wrapped in double quotes with every +// character that bash treats as active inside double quotes escaped with a +// backslash. The four actives are \, ", $, `. After bash decodes the +// resulting string each of those bytes is delivered intact to the program +// bash spawns (cmd / powershell / a second bash), so a path like +// `C:\tmp\$foo\script.ps1` survives the bash -c hop without losing the +// backslash before `$`. cmd.exe never sees this encoding because we only +// pick it when bash is the chosen shell. +func quoteForBashDoubleQuote(s string) string { + var b strings.Builder + b.Grow(len(s) + 2) + b.WriteByte('"') + for i := range len(s) { + c := s[i] + if c == '\\' || c == '"' || c == '$' || c == '`' { + b.WriteByte('\\') + } + b.WriteByte(c) + } + b.WriteByte('"') + return b.String() +} diff --git a/internal/report/workspace.go b/internal/report/workspace.go index 96d436f..cbd4d72 100644 --- a/internal/report/workspace.go +++ b/internal/report/workspace.go @@ -32,6 +32,15 @@ const ( filePerm = 0o644 ) +// isRootedPath reports whether p begins with either path separator. On +// Windows filepath.IsAbs requires a volume name (e.g. `C:\…`), so a POSIX- +// style "/abs.txt" passed in from user-supplied YAML would otherwise be +// accepted as a relative path; treat any leading `/` or `\` as rooted to +// keep the path-traversal guard OS-independent. +func isRootedPath(p string) bool { + return strings.HasPrefix(p, "/") || strings.HasPrefix(p, `\`) +} + // validateCaseID checks that caseID does not contain path traversal sequences. func validateCaseID(caseID string) error { cleaned := filepath.Clean(caseID) @@ -204,9 +213,12 @@ func (w *IterationWorkspace) WriteBenchmarkMD(bm *AnthropicBenchmark) error { // WriteFile writes arbitrary content to a file in the iteration directory. func (w *IterationWorkspace) WriteFile(relPath string, data []byte) error { - // Prevent path traversal attacks. + // Prevent path traversal attacks. On Windows filepath.IsAbs requires a + // volume name, so a POSIX-style "/abs.txt" passed in by an eval case + // would slip through; reject rooted paths (leading separator) as well + // to keep the security check OS-independent. cleaned := filepath.Clean(relPath) - if filepath.IsAbs(cleaned) || strings.HasPrefix(cleaned, "..") { + if filepath.IsAbs(cleaned) || isRootedPath(cleaned) || strings.HasPrefix(cleaned, "..") { return fmt.Errorf("invalid relative path: %s", relPath) } diff --git a/internal/report/workspace_test.go b/internal/report/workspace_test.go index 9685efe..0163279 100644 --- a/internal/report/workspace_test.go +++ b/internal/report/workspace_test.go @@ -46,21 +46,26 @@ func TestNewIterationWorkspace_DefaultAndInvalidIteration(t *testing.T) { func TestIterationWorkspace_Paths(t *testing.T) { t.Parallel() - ws, err := NewIterationWorkspace("/tmp/test-workspace", "test-skill", 1) + // Use filepath.Join so the test root and the values produced by + // IterationDir/CaseDir/etc. share the host's path separator. + root := filepath.Join(string(filepath.Separator), "tmp", "test-workspace") + iter := filepath.Join(root, "iteration-1") + caseDir := filepath.Join(iter, "case-1") + ws, err := NewIterationWorkspace(root, "test-skill", 1) if err != nil { t.Fatalf("NewIterationWorkspace error: %v", err) } - if ws.IterationDir() != "/tmp/test-workspace/iteration-1" { + if ws.IterationDir() != iter { t.Errorf("unexpected iteration dir: %s", ws.IterationDir()) } - if ws.CaseDir("case-1") != "/tmp/test-workspace/iteration-1/case-1" { + if ws.CaseDir("case-1") != caseDir { t.Errorf("unexpected case dir: %s", ws.CaseDir("case-1")) } - if ws.WithSkillDir("case-1") != "/tmp/test-workspace/iteration-1/case-1/with_skill" { + if ws.WithSkillDir("case-1") != filepath.Join(caseDir, "with_skill") { t.Errorf("unexpected with_skill dir: %s", ws.WithSkillDir("case-1")) } - if ws.WithoutSkillDir("case-1") != "/tmp/test-workspace/iteration-1/case-1/without_skill" { + if ws.WithoutSkillDir("case-1") != filepath.Join(caseDir, "without_skill") { t.Errorf("unexpected without_skill dir: %s", ws.WithoutSkillDir("case-1")) } } diff --git a/internal/runtime/docker.go b/internal/runtime/docker.go index 5b19f07..205141d 100644 --- a/internal/runtime/docker.go +++ b/internal/runtime/docker.go @@ -19,6 +19,7 @@ import ( "github.com/alibaba/skill-up/internal/logging" "github.com/alibaba/skill-up/internal/observability" + "github.com/alibaba/skill-up/internal/platform" ) const ( @@ -249,6 +250,11 @@ func (r *DockerRuntime) Close() error { return nil } +// TargetGOOS reports the GOOS of the container's guest OS. skill-up's +// docker runtime currently provisions a Linux image, so commands executed +// via `docker exec` run on a Linux guest regardless of the host platform. +func (r *DockerRuntime) TargetGOOS() string { return platform.GOOSLinux } + // Start starts the container if it is not already running. func (r *DockerRuntime) Start(ctx context.Context) error { r.mu.Lock() diff --git a/internal/runtime/none.go b/internal/runtime/none.go index caf498e..45e9c50 100644 --- a/internal/runtime/none.go +++ b/internal/runtime/none.go @@ -8,17 +8,30 @@ import ( "os" "os/exec" "path/filepath" + goruntime "runtime" "time" "go.opentelemetry.io/otel/attribute" "github.com/alibaba/skill-up/internal/logging" "github.com/alibaba/skill-up/internal/observability" + "github.com/alibaba/skill-up/internal/platform" ) const ( noneDirMode = 0o755 noneFileMode = 0o600 + + // execContextGracePeriod bounds how long Exec waits after ctx-cancel + // before forcibly closing the child's stdio pipes. Applied on every + // platform: the original motivation is Windows under Git Bash, where a + // killed parent's grandchild (ping/sleep/git) can still hold the stderr + // pipe and block pipe-reader goroutines forever; on POSIX it adds the + // same upper bound on shutdown for the rarer case of a child that + // ignores SIGTERM and keeps stdio open. 10s is comfortably above + // observed Windows grandchild teardown latency while still surfacing a + // hang quickly in tests. + execContextGracePeriod = 10 * time.Second ) // pathInWorkspaceOrAbs returns p if it is an absolute host path, otherwise filepath.Join(r.workspace, p). @@ -168,7 +181,12 @@ func (r *NoneRuntime) Exec(ctx context.Context, command string, opts ExecOptions defer span.End() startTime := time.Now() - cmd := exec.CommandContext(ctx, "bash", "-c", command) + shell := platform.Host() + cmd := shell.Cmd(ctx, command) + // Bound the grace window between ctx-cancel and Wait returning so the + // pipe-reader goroutine can unblock. See execContextGracePeriod above + // for the reasoning. + cmd.WaitDelay = execContextGracePeriod if opts.Cwd != "" { cmd.Dir = opts.Cwd } else { @@ -180,6 +198,10 @@ func (r *NoneRuntime) Exec(ctx context.Context, command string, opts ExecOptions ) env := mergeEnv(r.cfg.Env, opts.Env) + // The shell descriptor may need its own env tweaks (MSYS_NO_PATHCONV on + // Windows-bash, ...) — append them once here so the same "use bash → + // disable MSYS argv rewrite" decision lives in a single place. + env = append(env, shell.Env...) cmd.Env = env var stdout, stderr bytes.Buffer @@ -236,24 +258,27 @@ func (r *NoneRuntime) Exec(ctx context.Context, command string, opts ExecOptions // classifyExecError translates a *exec.Cmd Run error into the (exitCode, error) // pair that callers expose through ExecResult. // -// Precedence (matches the legacy inline behaviour): +// Precedence: // // nil err → (0, nil) -// *exec.ExitError + ctx.Err() == nil → (exitCode, nil) -// *exec.ExitError + ctx.Err() != nil → (exitCode, ctxErr) // process was killed by ctx -// non-ExitError → (-1, ctxErr or err) +// ctx.Err() != nil (any cause) → (-1, ctxErr) // process was killed by ctx +// *exec.ExitError → (exitCode, nil) +// non-ExitError → (-1, runErr) +// +// When the context terminated the process we always report -1 instead of the +// OS-reported exit code: on Windows a killed cmd.exe surfaces 1, which would +// otherwise be indistinguishable from a normal failure. func classifyExecError(ctx context.Context, runErr error) (int, error) { if runErr == nil { return 0, nil } - ctxErr := ctx.Err() + if ctxErr := ctx.Err(); ctxErr != nil { + return -1, ctxErr + } var exitErr *exec.ExitError if errors.As(runErr, &exitErr) { - return exitErr.ExitCode(), ctxErr - } - if ctxErr != nil { - return -1, ctxErr + return exitErr.ExitCode(), nil } return -1, runErr } @@ -305,3 +330,9 @@ func (r *NoneRuntime) Workspace() string { func (r *NoneRuntime) RequiresProcessSandbox() bool { return true } + +// TargetGOOS reports the host OS, since NoneRuntime executes commands directly +// on the host. +func (r *NoneRuntime) TargetGOOS() string { + return goruntime.GOOS +} diff --git a/internal/runtime/none_test.go b/internal/runtime/none_test.go index 03d856b..af882e2 100644 --- a/internal/runtime/none_test.go +++ b/internal/runtime/none_test.go @@ -7,6 +7,7 @@ import ( "os" "os/exec" "path/filepath" + goruntime "runtime" "strings" "sync" "testing" @@ -225,7 +226,13 @@ func TestNoneRuntime_ExecReturnsContextErrorOnTimeout(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) defer cancel() - result, err := rt.Exec(ctx, "sleep 1", ExecOptions{}) + // `sleep 1` is POSIX; on Windows cmd.exe falls back to a long ping + // so the process actually outlives the deadline and gets killed. + sleepCmd := "sleep 1" + if goruntime.GOOS == "windows" { + sleepCmd = "ping -n 3 127.0.0.1 > nul" + } + result, err := rt.Exec(ctx, sleepCmd, ExecOptions{}) if !errors.Is(err, context.DeadlineExceeded) { t.Fatalf("expected context deadline exceeded, got %v", err) } @@ -404,6 +411,11 @@ func TestNoneRuntime_ExecWithEnv(t *testing.T) { } func TestNoneRuntime_ExecExpandsPathFromRuntimeEnv(t *testing.T) { + if goruntime.GOOS == "windows" { + // The test asserts POSIX `printf "$PATH"` expansion; on Windows the + // host shell is cmd.exe, which neither has printf nor uses `$PATH`. + t.Skip("POSIX PATH expansion test; Windows has no equivalent") + } bashPath, err := exec.LookPath("bash") if err != nil { t.Fatal(err) diff --git a/internal/runtime/opensandbox.go b/internal/runtime/opensandbox.go index 9f13e5c..a1e1e5b 100644 --- a/internal/runtime/opensandbox.go +++ b/internal/runtime/opensandbox.go @@ -578,6 +578,12 @@ func (r *OpenSandboxRuntime) RequiresProcessSandbox() bool { return false } +// TargetGOOS reports "linux": OpenSandbox always executes commands inside a +// Linux sandbox regardless of the host OS. +func (r *OpenSandboxRuntime) TargetGOOS() string { + return "linux" +} + func (r *OpenSandboxRuntime) connectionConfig() opensandbox.ConnectionConfig { return opensandbox.ConnectionConfig{ Domain: r.baseURL, @@ -688,7 +694,7 @@ func (r *OpenSandboxRuntime) ensureDirectory(ctx context.Context, dir string, mo if err == nil { return nil } - quoted := shellquote.Quote(dir) + quoted := shellquote.QuotePOSIX(dir) result, execErr := r.runCommand(ctx, "/", "mkdir -p "+quoted+" && test -d "+quoted+" && test -w "+quoted, 30) if execErr != nil { return err @@ -711,7 +717,7 @@ func (r *OpenSandboxRuntime) ensureDirectories(ctx context.Context, dirs []strin command.WriteString("mkdir -p") for _, dir := range dirs { command.WriteByte(' ') - command.WriteString(shellquote.Quote(dir)) + command.WriteString(shellquote.QuotePOSIX(dir)) } result, err := r.runCommand(ctx, "/", command.String(), 30) if err != nil { @@ -816,7 +822,12 @@ func safeLocalTarget(root, rel string) (string, error) { if clean == "." { return root, nil } - if filepath.IsAbs(clean) || clean == ".." || strings.HasPrefix(clean, ".."+string(filepath.Separator)) { + // On Windows filepath.IsAbs requires a volume name, so a POSIX-style + // "/absolute" supplied via the SDK would otherwise slip through; also + // reject any rooted path (leading separator) so the guard behaves the + // same on both OSes. + rooted := strings.HasPrefix(clean, "/") || strings.HasPrefix(clean, `\`) + if filepath.IsAbs(clean) || rooted || clean == ".." || strings.HasPrefix(clean, ".."+string(filepath.Separator)) { return "", fmt.Errorf("unsafe sandbox file path: %s", rel) } return filepath.Join(root, clean), nil diff --git a/internal/runtime/runtime.go b/internal/runtime/runtime.go index 5042a0c..829bf73 100644 --- a/internal/runtime/runtime.go +++ b/internal/runtime/runtime.go @@ -112,6 +112,15 @@ type Runtime interface { Workspace() string // RequiresProcessSandbox reports whether agents should enable their own process sandbox. RequiresProcessSandbox() bool + // TargetGOOS reports the GOOS value of the environment where Exec + // runs commands. NoneRuntime executes on the host so it returns + // runtime.GOOS; OpenSandboxRuntime always returns "linux" because + // it executes inside a Linux sandbox. Implementations must return a + // non-empty value -- callers (most importantly the script-judge + // planner) use it to choose between POSIX and Windows command shapes, + // and silently defaulting to "linux" would mask configuration + // mistakes in any future Windows-targeting runtime. + TargetGOOS() string } // FileReadSeeker combines io.ReadSeeker for file access. diff --git a/internal/shellquote/shellquote.go b/internal/shellquote/shellquote.go index f927ec2..7814bca 100644 --- a/internal/shellquote/shellquote.go +++ b/internal/shellquote/shellquote.go @@ -1,8 +1,49 @@ +// Package shellquote quotes strings for safe inclusion in shell command lines. package shellquote import "strings" -// Quote returns a POSIX shell single-quoted representation of s. -func Quote(s string) string { +// QuotePOSIX returns a POSIX shell single-quoted representation of s. +func QuotePOSIX(s string) string { return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'" } + +// QuoteWindows returns a representation of s safe to pass as a single argument +// on a Windows command line, following the CommandLineToArgvW parsing rules: +// the argument is wrapped in double quotes; any run of backslashes immediately +// preceding a double quote (or the closing quote) is doubled; interior double +// quotes are escaped as \". +// +// The result is always wrapped, even when s has no whitespace or metacharacter +// triggers: when NoneRuntime.Exec routes through bash on Windows (the default +// when Git Bash is discoverable), an unquoted backslash-bearing path such as +// `C:\tmp\script.cmd` would have its backslashes stripped by bash and reach +// the downstream `cmd /c` / `powershell -File` as `C:tmpscript.cmd`. Wrapping +// in double quotes keeps backslashes literal under both bash and cmd, and is +// equally safe for CommandLineToArgvW consumers. +func QuoteWindows(s string) string { + if s == "" { + return `""` + } + var b strings.Builder + b.WriteByte('"') + backslashes := 0 + for i := range len(s) { + switch c := s[i]; c { + case '\\': + backslashes++ + case '"': + b.WriteString(strings.Repeat(`\`, backslashes*2+1)) + b.WriteByte('"') + backslashes = 0 + default: + b.WriteString(strings.Repeat(`\`, backslashes)) + backslashes = 0 + b.WriteByte(c) + } + } + // Double trailing backslashes so they do not escape the closing quote. + b.WriteString(strings.Repeat(`\`, backslashes*2)) + b.WriteByte('"') + return b.String() +} diff --git a/internal/shellquote/shellquote_test.go b/internal/shellquote/shellquote_test.go index 697cc8e..e621d56 100644 --- a/internal/shellquote/shellquote_test.go +++ b/internal/shellquote/shellquote_test.go @@ -2,22 +2,41 @@ package shellquote import "testing" -func TestQuote(t *testing.T) { +func TestQuotePOSIX(t *testing.T) { tests := []struct { - name string - input string - want string + in, want string }{ - {name: "plain", input: "hello", want: "'hello'"}, - {name: "spaces", input: "hello world", want: "'hello world'"}, - {name: "single quote", input: "can't", want: "'can'\\''t'"}, - {name: "empty", input: "", want: "''"}, + {"", "''"}, + {"plain", "'plain'"}, + {"with space", "'with space'"}, + {"it's", `'it'\''s'`}, } + for _, tt := range tests { + if got := QuotePOSIX(tt.in); got != tt.want { + t.Errorf("QuotePOSIX(%q) = %q, want %q", tt.in, got, tt.want) + } + } +} +func TestQuoteWindows(t *testing.T) { + tests := []struct { + name, in, want string + }{ + {"empty", "", `""`}, + // QuoteWindows always wraps in double quotes so bash (which strips + // unquoted backslashes) does not mangle paths like `C:\tmp\file`. + {"plain", "plain", `"plain"`}, + {"backslash path no space", `C:\tmp\s.ps1`, `"C:\tmp\s.ps1"`}, + {"space", `C:\Program Files\s.exe`, `"C:\Program Files\s.exe"`}, + {"interior quote", `a"b`, `"a\"b"`}, + {"trailing backslash with space", `a b\`, `"a b\\"`}, + {"backslash before quote", `a\"b`, `"a\\\"b"`}, + {"cmd metachar", "a&b", `"a&b"`}, + } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := Quote(tt.input); got != tt.want { - t.Fatalf("Quote(%q) = %q, want %q", tt.input, got, tt.want) + if got := QuoteWindows(tt.in); got != tt.want { + t.Errorf("QuoteWindows(%q) = %q, want %q", tt.in, got, tt.want) } }) } diff --git a/scripts/windows/hooks.ps1 b/scripts/windows/hooks.ps1 new file mode 100644 index 0000000..a42fb8f --- /dev/null +++ b/scripts/windows/hooks.ps1 @@ -0,0 +1,11 @@ +#!/usr/bin/env pwsh +# Point Git at this repo's hooks (run once per clone). +# PowerShell counterpart of the `hooks` target in the Makefile, for Windows +# contributors who do not have GNU make available. +$ErrorActionPreference = 'Stop' + +Set-Location (git rev-parse --show-toplevel) +if ((git config core.hooksPath) -ne '.githooks') { + git config core.hooksPath .githooks + Write-Host 'git hooks installed (.githooks)' +} diff --git a/scripts/windows/lint-tools.ps1 b/scripts/windows/lint-tools.ps1 new file mode 100644 index 0000000..4c66d7f --- /dev/null +++ b/scripts/windows/lint-tools.ps1 @@ -0,0 +1,23 @@ +#!/usr/bin/env pwsh +# Install the pinned lint tools into .tools/bin. +# PowerShell counterpart of the `lint-tools` target in the Makefile. The tool +# versions are kept in sync with the Makefile (GOLANGCI_LINT_VERSION / +# REVIVE_VERSION). +$ErrorActionPreference = 'Stop' + +$golangciLintVersion = 'v2.11.4' +$reviveVersion = 'v1.10.0' + +$repoRoot = (git rev-parse --show-toplevel) +$toolsBin = Join-Path $repoRoot '.tools\bin' +New-Item -ItemType Directory -Force -Path $toolsBin | Out-Null + +$env:GOBIN = $toolsBin +$env:GOFLAGS = '-buildvcs=false' + +go install "github.com/golangci/golangci-lint/v2/cmd/golangci-lint@$golangciLintVersion" +if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } +go install "github.com/mgechev/revive@$reviveVersion" +if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +Write-Host "lint tools installed into $toolsBin" diff --git a/scripts/windows/verify.ps1 b/scripts/windows/verify.ps1 new file mode 100644 index 0000000..3bb0ce8 --- /dev/null +++ b/scripts/windows/verify.ps1 @@ -0,0 +1,33 @@ +#!/usr/bin/env pwsh +# Format check, vet, revive, and golangci-lint. +# PowerShell counterpart of the `verify` target in the Makefile. +$ErrorActionPreference = 'Stop' + +$repoRoot = (git rev-parse --show-toplevel) +Set-Location $repoRoot +$toolsBin = Join-Path $repoRoot '.tools\bin' + +# fmt-check: fail if any .go file is not gofmt-formatted. +$unformatted = (gofmt -l .) +if ($unformatted) { + Write-Host 'These files need gofmt (run: gofmt -w .):' + Write-Host $unformatted + exit 1 +} + +go vet ./... +if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +$revive = Join-Path $toolsBin 'revive.exe' +$golangciLint = Join-Path $toolsBin 'golangci-lint.exe' +if (-not (Test-Path $revive) -or -not (Test-Path $golangciLint)) { + & (Join-Path $PSScriptRoot 'lint-tools.ps1') +} + +& $revive -config revive.toml ./... +if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +& $golangciLint run ./... +if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +Write-Host 'verify passed'