diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..952a2ef
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,16 @@
+# Keep line endings deterministic regardless of the contributor's OS or
+# core.autocrlf setting.
+
+# Go source must be LF: gofmt and the toolchain expect Unix line endings.
+*.go   text eol=lf
+
+# Shell scripts must be LF: a CRLF shebang line (e.g. "#!/bin/sh\r") makes
+# the kernel fail to locate the interpreter.
+*.sh   text eol=lf
+
+# PowerShell handles LF on every platform; keep it consistent with .editorconfig.
+*.ps1  text eol=lf
+
+# Windows batch scripts use CRLF.
+*.cmd  text eol=crlf
+*.bat  text eol=crlf
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4f22f32..a159c3d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -24,9 +24,13 @@ permissions:
 jobs:
   build:
     name: Build & Test
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
     strategy:
+      # Surface failures on every OS independently instead of cancelling the
+      # whole matrix when one runner fails.
+      fail-fast: false
       matrix:
+        os: [ubuntu-latest, windows-latest]
         go-version: ["1.25.x"]
 
     steps:
@@ -43,14 +47,22 @@ jobs:
       - name: Build
         run: go build ./...
 
+      # Force bash on Windows runners too: pwsh's legacy native-argument
+      # passing splits `-coverprofile=coverage.out` and feeds `.out` to go
+      # as a package import path, producing `FAIL .out [setup failed]`.
+      # Git Bash is preinstalled on windows-latest and parses args verbatim.
       - name: Test (with race detector and coverage)
+        shell: bash
         run: go test -race -timeout 120s -covermode=atomic -coverpkg=./... -coverprofile=coverage.out ./...
 
       # Self-hosted coverage badge: parse the total from `go tool cover -func`
       # and rewrite .github/badges/coverage.json. The shields.io endpoint badge
       # in README.md reads this JSON via raw.githubusercontent.com.
+      # Ubuntu-only: the Windows leg of the matrix uses a different shell
+      # toolchain and we only need one canonical coverage number.
       - name: Compute coverage and update badge
         id: coverage
+        if: matrix.os == 'ubuntu-latest'
         run: |
           pct=$(go tool cover -func=coverage.out | awk '/^total:/ {gsub("%","",$3); print $3}')
           if [ -z "$pct" ]; then
@@ -79,7 +91,7 @@ jobs:
       # upload would silently match nothing and, combined with
       # if-no-files-found: error, fail the build job on every push to main.
       - name: Upload coverage badge artifact
-        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+        if: matrix.os == 'ubuntu-latest' && github.event_name == 'push' && github.ref == 'refs/heads/main'
         uses: actions/upload-artifact@v5
         with:
           name: coverage-badge
@@ -121,6 +133,43 @@ jobs:
           git commit -m "chore(ci): update coverage badge to ${pct} [skip ci]"
           git push origin badges
 
+  e2e-windows:
+    # Windows e2e is intentionally narrower than the Linux e2e: it does not
+    # set SKILL_UP_FULL_E2E, so the LLM-dependent tests in e2e/cli_test.go,
+    # e2e/agent_test.go and e2e/mcp_test.go self-skip. The mock-engine and
+    # script-judge contract tests still exercise the Windows-specific code
+    # paths added by issue #31 — shell selection, MSYS argv handling,
+    # QuoteWindows, NewShellCmd's WaitDelay, the script-judge interpreter
+    # dispatch — through the full CLI pipeline. Real-LLM coverage stays on
+    # the Linux e2e job below.
+    name: E2E (none runtime, Windows)
+    runs-on: windows-latest
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: actions/setup-go@v6
+        with:
+          go-version: "1.25.x"
+          cache: true
+
+      # Match the Build & Test step's shell choice so pwsh's legacy argv
+      # passing does not split `-coverprofile`-style flags.
+      - name: Run e2e tests (none runtime, quick mode)
+        shell: bash
+        run: go test -tags e2e -timeout 1200s -count=1 -v ./e2e
+        env:
+          SKILL_UP_E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts
+
+      - name: Upload e2e workspace artifacts
+        if: always() && hashFiles('e2e-artifacts/**') != ''
+        uses: actions/upload-artifact@v5
+        with:
+          name: e2e-windows-workspaces
+          path: e2e-artifacts/
+          if-no-files-found: ignore
+          retention-days: 14
+
   e2e:
     name: E2E (none runtime)
     runs-on: ubuntu-latest
@@ -213,7 +262,7 @@ jobs:
 
       - name: Upload e2e workspace artifacts
         if: always() && steps.secrets.outputs.available == 'true' && hashFiles('e2e-artifacts/**') != ''
-        uses: actions/upload-artifact@v7
+        uses: actions/upload-artifact@v5
         with:
           name: e2e-workspaces
           path: e2e-artifacts/
@@ -336,7 +385,7 @@ jobs:
 
       - name: Upload OpenSandbox server log
         if: always() && steps.secrets.outputs.available == 'true'
-        uses: actions/upload-artifact@v7
+        uses: actions/upload-artifact@v5
         with:
           name: opensandbox-server-log
           path: ${{ runner.temp }}/opensandbox-server.log
@@ -345,7 +394,7 @@ jobs:
 
       - name: Upload opensandbox e2e workspace artifacts
         if: always() && steps.secrets.outputs.available == 'true' && hashFiles('e2e-opensandbox-artifacts/**') != ''
-        uses: actions/upload-artifact@v7
+        uses: actions/upload-artifact@v5
         with:
           name: e2e-opensandbox-workspaces
           path: e2e-opensandbox-artifacts/
diff --git a/AGENTS.md b/AGENTS.md
index 22fe952..c2fa654 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -34,6 +34,11 @@ make lint-tools
 If you are in mainland China and `go install` is slow, set
 `GOPROXY=https://goproxy.cn,direct` before running the commands above.
 
+On Windows, `make` is unavailable by default; use the PowerShell equivalents
+in `scripts/windows/` (`hooks.ps1`, `lint-tools.ps1`, `verify.ps1`). See the
+[Windows support guide](docs/guide/windows.md) for supported features and
+known limitations.
+
 ## Build & run
 
 ```bash
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8d1de89..8d179a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,39 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.3.0] - 2026-05-27
+
+### Added
+- **First-class Windows support** for the CLI, the `none` runtime, and the
+  script judge. Native Windows builds run all unit tests, the script judge
+  routes `.ps1`/`.cmd`/`.bat` directly and `.sh` through Git Bash when
+  available, and CI gains a `windows-latest` build/test matrix plus a
+  dedicated `E2E (none runtime, Windows)` contract job.
+  See [Windows Support](docs/guide/windows.md) for the full guide.
+- `SKILL_UP_BASH` environment variable: explicit path to a `bash`
+  executable for skill-up's `none` runtime to use. Honored on every
+  platform (read once at startup, takes precedence over `PATH`).
+- PowerShell tooling under `scripts/windows/`: `hooks.ps1`,
+  `lint-tools.ps1`, and `verify.ps1` mirror the Makefile targets for
+  contributors on Windows; `examples/judge-debug-eval.ps1` provides a
+  runnable PowerShell script-judge example.
+- `.gitattributes` pins line endings (LF for `*.sh`, CRLF for `*.ps1` /
+  `*.cmd` / `*.bat`) so Git checkout on Windows does not break scripts.
+
+### Changed
+- Agent CLIs (Claude Code, Codex, Qoder CLI) now hard-fail on Windows
+  hosts without a discoverable bash, with a clear error pointing at
+  Git Bash or `SKILL_UP_BASH`. Previously the cmd.exe fallback would
+  accept agent commands but leak shell metacharacters from instructions
+  into the host shell.
+- `internal/platform` centralizes host shell, quoter, and bash discovery
+  behind a single `platform.Host()` (cached for the process lifetime).
+  Replaces the previous ad-hoc platform branching in `NoneRuntime.Exec`
+  and the script-judge planner.
+- `Runtime.TargetGOOS() string` is now a required interface method so
+  future runtimes get a compile-time error rather than silently
+  defaulting to `"linux"`.
+
 ## [0.2.3] - 2026-05-27
 
 ### Added
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 333cad4..be383d9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -20,6 +20,12 @@ We welcome bug reports, feature requests, documentation improvements, and code c
    # If you touched anything under e2e/ or internal/runner/, also run:
    make e2e
    ```
+   On Windows, `make` is unavailable by default — use the PowerShell scripts
+   in `scripts/windows/` (`verify.ps1`, `lint-tools.ps1`, `hooks.ps1`) and the
+   standard `go build` / `go test -race ./...` commands. The `make e2e`
+   equivalent is `go test -tags e2e -v ./e2e` (with the same env vars the
+   Makefile target sets). See the
+   [Windows support guide](docs/guide/windows.md).
 5. Commit using **Conventional Commits** (enforced by `.githooks/commit-msg`). See the *Commit Message* section below for the allowed types and examples.
 6. Push your branch to your fork and open a Pull Request against `main`. Fill out the PR template, link any related issues, and describe the user-visible impact.
 7. Update [`CHANGELOG.md`](CHANGELOG.md) in the same PR if your change is user-visible.
diff --git a/README.md b/README.md
index 696d32f..72e2e24 100644
--- a/README.md
+++ b/README.md
@@ -144,6 +144,11 @@ make build
 go build -o bin/skill-up ./cmd/skill-up
 ```
 
+**Windows users**: skill-up runs natively on Windows. See
+[Windows Support](docs/guide/windows.md) for the recommended workflow,
+known limitations (notably: native agent CLI execution requires Git
+Bash), and the PowerShell tooling under `scripts/windows/`.
+
 ## Quick Start
 
 ### 1. Create Eval Config
diff --git a/README.zh.md b/README.zh.md
index bb05f0a..baeca7d 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -141,6 +141,11 @@ make build
 go build -o bin/skill-up ./cmd/skill-up
 ```
 
+**Windows 用户**：skill-up 原生支持 Windows。请参阅
+[Windows 支持指南](docs/zh/guide/windows.md) 了解推荐工作流、已知限制
+（特别是：原生运行 agent CLI 需要 Git Bash）以及 `scripts/windows/`
+下的 PowerShell 工具脚本。
+
 ## 快速上手
 
 ### 第一步：创建评测配置
diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts
index 4f5f343..5f77440 100644
--- a/docs/.vitepress/config.ts
+++ b/docs/.vitepress/config.ts
@@ -51,6 +51,7 @@ export default defineConfig({
               text: 'Introduction',
               items: [
                 { text: 'Getting Started', link: '/guide/getting-started' },
+                { text: 'Windows Support', link: '/guide/windows' },
               ],
             },
             {
@@ -106,6 +107,7 @@ export default defineConfig({
               text: '入门',
               items: [
                 { text: '快速上手', link: '/zh/guide/getting-started' },
+                { text: 'Windows 支持', link: '/zh/guide/windows' },
               ],
             },
             {
diff --git a/docs/guide/windows.md b/docs/guide/windows.md
new file mode 100644
index 0000000..1112a0e
--- /dev/null
+++ b/docs/guide/windows.md
@@ -0,0 +1,109 @@
+# Windows Support
+
+skill-up runs natively on Windows. This page covers what works, the current
+limitations, and the recommended workflow.
+
+---
+
+## Supported
+
+- **Build and unit tests** — `go build ./...` and `go test ./...` pass on
+  Windows. CI exercises a `windows-latest` runner alongside Linux.
+- **The `none` runtime** — commands run on the host through `cmd.exe`.
+- **The `opensandbox` runtime** — unaffected by the host OS; it always
+  executes inside a Linux sandbox.
+- **The script judge** — dispatches by file extension (or shebang):
+
+  | Script            | Interpreter on Windows                      |
+  | ----------------- | ------------------------------------------- |
+  | `.ps1`            | PowerShell                                  |
+  | `.cmd` / `.bat`   | `cmd.exe`                                   |
+  | `.sh`             | bash (Git Bash; see below)                  |
+
+## Running `.sh` script judges on Windows
+
+A `.sh` script judge needs a `bash` interpreter. skill-up looks for one in
+this order:
+
+1. the `SKILL_UP_BASH` environment variable (an explicit path to `bash.exe`);
+2. `bash` on `PATH`;
+3. well-known Git Bash install locations —
+   `C:\Program Files\Git\bin\bash.exe` and
+   `C:\Program Files (x86)\Git\bin\bash.exe`.
+
+If none is found the script judge fails with a clear error. Install
+[Git for Windows](https://git-scm.com/download/win) or set `SKILL_UP_BASH`.
+
+The WSL shim at `C:\Windows\System32\bash.exe` is intentionally rejected at
+all three steps (override, PATH, well-known) because it expects Linux-format
+`/mnt/c/...` paths and silently fails on the Windows-style paths skill-up
+generates. Users who want to drive script judges through WSL must arrange
+path translation upstream and point `SKILL_UP_BASH` at a non-WSL bash — or
+simply run skill-up inside WSL itself (see "Recommended workflow" below).
+
+## OpenSandbox runtime on Windows
+
+The `opensandbox` runtime talks to a remote OpenSandbox server over HTTP and
+never spawns a host shell. Running `skill-up.exe` on native Windows against a
+remote sandbox works today: all host-side path handling already crosses the
+host→sandbox boundary through `filepath.ToSlash`, and the sandbox itself is a
+Linux container, so the script judge and any agent run inside it behave
+exactly as they do on Linux.
+
+OpenSandbox also offers a [**Windows guest profile**](https://github.com/alibaba/OpenSandbox/blob/main/docs/windows-sandbox.md):
+the server runs `dockur/windows` (Windows in KVM/QEMU inside a Linux container)
+and the API accepts `platform: {"os": "windows", "arch": "amd64"}` on create.
+At the time of writing the Go SDK does not yet expose the `Platform` field, so
+driving a Windows-guest sandbox from skill-up is blocked on an upstream Go SDK
+update — tracked separately.
+
+For a Windows machine that needs the full agent workflow **without** a remote
+sandbox, run skill-up inside **WSL2**. WSL2 is a Linux environment, so both the
+`none` and `opensandbox` runtimes — including the agent Node/nvm bootstrap —
+work without limitation.
+
+## Contributor tooling
+
+`make` is not available on Windows by default. Use the PowerShell scripts
+under `scripts/windows/` instead:
+
+```powershell
+# Install git hooks (equivalent to `make hooks`)
+pwsh scripts/windows/hooks.ps1
+
+# Install pinned lint tools into .tools/bin (equivalent to `make lint-tools`)
+pwsh scripts/windows/lint-tools.ps1
+
+# fmt-check + vet + revive + golangci-lint (equivalent to `make verify`)
+pwsh scripts/windows/verify.ps1
+```
+
+Build and test use the standard Go toolchain, which is cross-platform:
+
+```powershell
+go build -o bin/skill-up.exe ./cmd/skill-up
+go test -race ./...
+```
+
+## Known limitations
+
+- **Running real agents natively** — Claude Code / Codex / Qoder CLI are
+  launched through a bash-based Node/nvm bootstrap. That bootstrap does not
+  run under `cmd.exe`. To run full agent evals on Windows, either install
+  Node.js and the agent CLIs yourself beforehand, or use WSL2.
+- **`.ps1` script judges require a Windows target** — when the runtime target
+  is POSIX (for example the `opensandbox` Linux sandbox), only `.sh` scripts
+  are supported.
+- **`cmd.exe` expands `%VAR%` inside arguments** — when no bash is discovered
+  and the `cmd /d /s /c` fallback shell runs, literal `%NAME%` substrings
+  inside command arguments are still expanded by cmd. There is no reliable
+  command-line escape for this. Do not interpolate untrusted strings into
+  shell commands. Install Git Bash (which skill-up auto-discovers) to avoid
+  the cmd fallback entirely.
+
+## Recommended workflow
+
+- **Authoring and running script-judge evals** — native Windows works well.
+  Prefer `.ps1` script judges, or install Git for Windows for `.sh` support.
+- **Running full agent evals** — use **WSL2**, so the evaluator and the agent
+  CLIs share one POSIX environment and avoid path/credential friction.
diff --git a/docs/zh/guide/windows.md b/docs/zh/guide/windows.md
new file mode 100644
index 0000000..2d78826
--- /dev/null
+++ b/docs/zh/guide/windows.md
@@ -0,0 +1,96 @@
+# Windows 支持
+
+skill-up 原生支持 Windows。本页说明哪些功能可用、当前的限制，以及推荐的工作流。
+
+---
+
+## 已支持
+
+- **构建与单元测试** —— `go build ./...` 和 `go test ./...` 在 Windows 上通过。
+  CI 在 Linux 之外额外运行 `windows-latest` runner。
+- **`none` runtime** —— 命令通过 `cmd.exe` 在宿主机上执行。
+- **`opensandbox` runtime** —— 不受宿主机 OS 影响，始终在 Linux 沙箱内执行。
+- **script judge** —— 按文件扩展名（或 shebang）分派解释器：
+
+  | 脚本              | Windows 上的解释器                          |
+  | ----------------- | ------------------------------------------- |
+  | `.ps1`            | PowerShell                                  |
+  | `.cmd` / `.bat`   | `cmd.exe`                                   |
+  | `.sh`             | bash（Git Bash，见下文）                    |
+
+## 在 Windows 上运行 `.sh` script judge
+
+`.sh` script judge 需要一个 `bash` 解释器。skill-up 按以下顺序查找：
+
+1. `SKILL_UP_BASH` 环境变量（指向 `bash.exe` 的明确路径）；
+2. `PATH` 上的 `bash`；
+3. 知名 Git Bash 安装位置 —— `C:\Program Files\Git\bin\bash.exe` 与
+   `C:\Program Files (x86)\Git\bin\bash.exe`。
+
+若都找不到，script judge 会以明确的错误失败。请安装
+[Git for Windows](https://git-scm.com/download/win) 或设置 `SKILL_UP_BASH`。
+
+`C:\Windows\System32\bash.exe`（WSL shim）会在三个步骤里都被主动忽略 ——
+即使通过 `SKILL_UP_BASH` 显式指向或它在 PATH 上排在前面，因为它期望 Linux
+风格的 `/mnt/c/...` 路径，而 skill-up 传入的是 Windows 路径，会静默失败。
+需要走 WSL 的用户请自行处理路径翻译并把 `SKILL_UP_BASH` 指向非 WSL 的 bash，
+或者直接在 WSL 内运行 skill-up（见下文「推荐工作流」）。
+
+## Windows 上的 OpenSandbox runtime
+
+`opensandbox` runtime 通过 HTTP 与远程 OpenSandbox 服务器通信，不会启动任何
+宿主机 shell。在原生 Windows 上运行 `skill-up.exe` 连接远程 sandbox 当前即可
+工作：所有宿主机侧的路径处理都已通过 `filepath.ToSlash` 跨越「宿主机→sandbox」
+边界，而 sandbox 本身是 Linux 容器，因此其中的 script judge 和 agent 行为与在
+Linux 上完全一致。
+
+OpenSandbox 也提供 [**Windows guest profile**](https://github.com/alibaba/OpenSandbox/blob/main/docs/windows-sandbox.md)：
+服务端在 Linux 容器里通过 KVM/QEMU 运行 `dockur/windows`，创建 API 接受
+`platform: {"os": "windows", "arch": "amd64"}`。撰写本文时 Go SDK 尚未暴露
+`Platform` 字段，因此从 skill-up 驱动 Windows guest sandbox 依赖上游 Go SDK
+补齐 —— 单独跟进。
+
+如果某台 Windows 机器需要在**没有**远程 sandbox 的情况下使用完整的 agent
+工作流，请在 **WSL2** 中运行 skill-up。WSL2 是 Linux 环境，因此 `none` 与
+`opensandbox` 两种 runtime —— 包括 agent 的 Node/nvm 引导 —— 都能无限制工作。
+
+## 贡献者工具
+
+Windows 默认没有 `make`。请改用 `scripts/windows/` 下的 PowerShell 脚本：
+
+```powershell
+# 安装 git hooks（等价于 `make hooks`）
+pwsh scripts/windows/hooks.ps1
+
+# 将固定版本的 lint 工具装入 .tools/bin（等价于 `make lint-tools`）
+pwsh scripts/windows/lint-tools.ps1
+
+# fmt-check + vet + revive + golangci-lint（等价于 `make verify`）
+pwsh scripts/windows/verify.ps1
+```
+
+构建和测试使用标准的 Go 工具链，本身就是跨平台的：
+
+```powershell
+go build -o bin/skill-up.exe ./cmd/skill-up
+go test -race ./...
+```
+
+## 已知限制
+
+- **原生运行真实 agent** —— Claude Code / Codex / Qoder CLI 通过基于 bash 的
+  Node/nvm 引导脚本启动，该脚本无法在 `cmd.exe` 下运行。要在 Windows 上运行
+  完整的 agent 评测，请预先自行安装 Node.js 和对应的 agent CLI，或使用 WSL2。
+- **`.ps1` script judge 需要 Windows 目标** —— 当 runtime 目标是 POSIX
+  （例如 `opensandbox` 的 Linux 沙箱）时，仅支持 `.sh` 脚本。
+- **`cmd.exe` 会展开参数里的 `%VAR%`** —— 当宿主未发现 bash、回退到
+  `cmd /d /s /c` 时，参数中的 `%NAME%` 子串仍会被 cmd 展开，命令行层面没有
+  可靠的转义办法。不要把不可信字符串拼接到 shell 命令中。安装 Git Bash
+  （skill-up 会自动发现）可完全避开此回退路径。
+
+## 推荐工作流
+
+- **编写并运行 script-judge 评测** —— 原生 Windows 即可。优先使用 `.ps1`
+  script judge，或安装 Git for Windows 以支持 `.sh`。
+- **运行完整的 agent 评测** —— 使用 **WSL2**，让评测器与 agent CLI 共享同一个
+  POSIX 环境，避免路径与凭据的摩擦。
diff --git a/e2e/main_test.go b/e2e/main_test.go
index 21ea5b5..f4682b2 100644
--- a/e2e/main_test.go
+++ b/e2e/main_test.go
@@ -24,7 +24,14 @@ func mustCompile() (string, func()) {
 		panic("creating temp dir: " + err.Error())
 	}
 
-	binPath := filepath.Join(dir, "skill-up")
+	binName := "skill-up"
+	if runtime.GOOS == "windows" {
+		// Windows refuses to execute a file without a recognized extension,
+		// so go build's -o output must end in .exe or every later
+		// exec.Command(binaryPath) fails with "executable file not found".
+		binName += ".exe"
+	}
+	binPath := filepath.Join(dir, binName)
 
 	_, testFile, _, _ := runtime.Caller(0)
 	projectRoot := filepath.Dir(filepath.Dir(testFile))
diff --git a/examples/judge-debug-eval.ps1 b/examples/judge-debug-eval.ps1
new file mode 100644
index 0000000..b7b5fd0
--- /dev/null
+++ b/examples/judge-debug-eval.ps1
@@ -0,0 +1,17 @@
+#!/usr/bin/env pwsh
+# Evaluation script: check whether the agent output identified a bug and
+# provided a fix suggestion. PowerShell counterpart of judge-debug-eval.sh,
+# for use as a script judge on Windows hosts.
+$message = $env:EVAL_FINAL_MESSAGE
+
+if ($message -match 'bug') {
+    if ($message -match 'fix|nil') {
+        Write-Output 'Agent identified the bug AND provided a fix'
+        exit 0
+    }
+    Write-Output 'Agent identified the bug but did NOT provide a fix'
+    exit 1
+}
+
+Write-Output 'Agent failed to identify any bug'
+exit 1
diff --git a/internal/agent/agent.go b/internal/agent/agent.go
index 0411650..c9e22dc 100644
--- a/internal/agent/agent.go
+++ b/internal/agent/agent.go
@@ -14,13 +14,37 @@ import (
 	"github.com/alibaba/skill-up/internal/credential"
 	"github.com/alibaba/skill-up/internal/logging"
 	"github.com/alibaba/skill-up/internal/observability"
+	"github.com/alibaba/skill-up/internal/platform"
 	"github.com/alibaba/skill-up/internal/runtime"
+	"github.com/alibaba/skill-up/internal/shellquote"
 	"github.com/alibaba/skill-up/pkg/transcript"
 )
 
 // ErrAgentNotFound is returned when the agent executable is not found.
 var ErrAgentNotFound = errors.New("agent not found in PATH")
 
+// ErrAgentRequiresBash is returned when an agent CLI is invoked on a Windows
+// host where Git Bash (or another bash discoverable by platform.DiscoverBash)
+// is not available. Agent commands are POSIX-quoted and assume a bash
+// interpreter; running them through the cmd.exe fallback would let metachars
+// (`& | " %VAR%`) in the instruction reach the shell unprotected. See
+// docs/guide/windows.md for the documented limitation.
+var ErrAgentRequiresBash = errors.New("agent CLI execution on Windows requires bash; install Git for Windows or set SKILL_UP_BASH")
+
+// requireBashOnWindowsHost rejects agent execution when the runtime's target
+// is Windows but the host shell would be cmd.exe. We only enforce this for
+// runtimes whose target matches the host (NoneRuntime today); sandboxed
+// runtimes target a non-Windows guest and never go through platform.Host().
+func requireBashOnWindowsHost(rt Runtime) error {
+	if rt.TargetGOOS() != platform.GOOSWindows {
+		return nil
+	}
+	if platform.Host().IsBash {
+		return nil
+	}
+	return ErrAgentRequiresBash
+}
+
 // ErrAgentInstallFailed is returned when agent installation fails.
 var ErrAgentInstallFailed = errors.New("agent installation failed")
 
@@ -114,9 +138,13 @@ const ExitCodeSignalKilled = -1
 const (
 	agentProviderOpenAI    = "openai"
 	agentProviderAnthropic = "anthropic"
-	agentExecutablePath    = "$HOME/.local/bin:$HOME/.nvm/current/bin:$PATH"
 )
 
+// agentExecutablePath is defined per host OS in path_{windows,other}.go: a
+// POSIX PATH override pointing at the nvm/Node bootstrap install locations on
+// POSIX hosts, and an empty string on Windows so the host's native PATH (and
+// case-insensitive `Path`) reach `where` lookups untouched.
+
 // NewBaseAgent creates a new BaseAgent with the given config.
 // It preserves the resolved config passed from the caller.
 func NewBaseAgent(cfg Config) BaseAgent {
@@ -169,8 +197,7 @@ func (a *BaseAgent) logCredentialStatus(ctx context.Context, apiKeyEnv, baseURLE
 	// This does not scan unrelated providers.
 	if apiKey := os.Getenv(apiKeyEnv); apiKey != "" {
 		logging.DebugContextf(ctx, "%s detected for %s (source=process_env)", apiKeyEnv, a.Name())
-		if baseURL := os.Getenv(baseURLEnv); baseURLEnv != "" && baseURL != "" {
-			_ = baseURL
+		if baseURLEnv != "" && os.Getenv(baseURLEnv) != "" {
 			logging.DebugContextf(ctx, "%s detected for %s (source=process_env)", baseURLEnv, a.Name())
 		}
 		return
@@ -258,7 +285,10 @@ func downloadSessionArtifact(ctx context.Context, rt Runtime, artifactDir, sessi
 }
 
 func (a *BaseAgent) mergeExecOptionsEnv(ctx context.Context, opts ExecOptions, envVars map[string]string, attrs map[string]string) ExecOptions {
-	merged := map[string]string{"PATH": agentExecutablePath}
+	merged := map[string]string{}
+	if agentExecutablePath != "" {
+		merged["PATH"] = agentExecutablePath
+	}
 	maps.Copy(merged, envVars)
 	maps.Copy(merged, opts.Env)
 	maps.Copy(merged, observability.AgentEnv(ctx, merged, attrs))
@@ -290,22 +320,13 @@ func formatAgentModel(provider, model string) string {
 	return provider + "/" + model
 }
 
-// shellQuote quotes a string for safe shell usage.
+// shellQuote quotes a string for safe POSIX shell usage. Agent commands are
+// always composed for and executed by bash (via the Node/nvm bootstrap), so
+// POSIX quoting is correct even when skill-up itself runs on a Windows host.
+// It delegates to internal/shellquote so the project keeps a single quoting
+// implementation.
 func shellQuote(s string) string {
-	if s == "" {
-		return "''"
-	}
-	var result strings.Builder
-	result.WriteByte('\'')
-	for _, c := range s {
-		if c == '\'' {
-			result.WriteString(`'\''`)
-		} else {
-			result.WriteRune(c)
-		}
-	}
-	result.WriteByte('\'')
-	return result.String()
+	return shellquote.QuotePOSIX(s)
 }
 
 // BuildInstructionFromMessages converts messages to a single instruction string.
diff --git a/internal/agent/claude_code.go b/internal/agent/claude_code.go
index 651700d..1dc13a9 100644
--- a/internal/agent/claude_code.go
+++ b/internal/agent/claude_code.go
@@ -96,6 +96,9 @@ func (a *ClaudeCodeAgent) CheckCredentials(ctx context.Context) error {
 // Run executes the claude-code agent with the given messages via stream-json.
 // It streams messages to stdin and parses stream-json output to build the transcript.
 func (a *ClaudeCodeAgent) Run(ctx context.Context, rt Runtime, opts ExecOptions, messages []transcript.Message) (*SessionResult, error) {
+	if err := requireBashOnWindowsHost(rt); err != nil {
+		return nil, fmt.Errorf("%s: %w", a.Name(), err)
+	}
 	start := time.Now()
 
 	sessionID := uuid.New().String()
diff --git a/internal/agent/claude_code_test.go b/internal/agent/claude_code_test.go
index 72c93dd..60fb3a7 100644
--- a/internal/agent/claude_code_test.go
+++ b/internal/agent/claude_code_test.go
@@ -741,3 +741,4 @@ func (r *claudeCodeTestRuntime) Workspace() string { return r.workspace }
 func (r *claudeCodeTestRuntime) RequiresProcessSandbox() bool {
 	return true
 }
+func (r *claudeCodeTestRuntime) TargetGOOS() string { return "linux" }
diff --git a/internal/agent/cli.go b/internal/agent/cli.go
index 4bdd270..7545e77 100644
--- a/internal/agent/cli.go
+++ b/internal/agent/cli.go
@@ -5,10 +5,13 @@ import (
 	"context"
 	"fmt"
 	"path/filepath"
+	"regexp"
+	"strings"
 	"text/template"
 	"time"
 
 	"github.com/alibaba/skill-up/internal/observability"
+	"github.com/alibaba/skill-up/internal/platform"
 	"github.com/alibaba/skill-up/internal/runtime"
 	"github.com/alibaba/skill-up/pkg/transcript"
 )
@@ -121,6 +124,9 @@ func (a *CLIAgent) InstallSkill(ctx context.Context, rt Runtime, skillCfg runtim
 
 // Run executes the agent with the given messages and returns the session result.
 func (a *CLIAgent) Run(ctx context.Context, rt Runtime, opts ExecOptions, messages []transcript.Message) (*SessionResult, error) {
+	if err := requireBashOnWindowsHost(rt); err != nil {
+		return nil, fmt.Errorf("%s: %w", a.Name(), err)
+	}
 	start := time.Now()
 
 	instruction := BuildInstructionFromMessages(messages)
@@ -151,12 +157,42 @@ func (a *CLIAgent) Run(ctx context.Context, rt Runtime, opts ExecOptions, messag
 	return sessionResult, nil
 }
 
+// commandVRegexp matches a POSIX `command -v <binary> [rest]` check. The
+// regex form (vs strings.CutPrefix) lets us capture the binary separately
+// from any trailing redirect or pipe, and supports surrounding whitespace
+// the way a real shell would.
+var commandVRegexp = regexp.MustCompile(`^\s*command\s+-v\s+(\S+)(\s.*)?$`)
+
+// checkCommandForOS adapts a POSIX `command -v X` availability check to the
+// target OS. Windows cmd.exe has no `command` builtin; `where` is the
+// equivalent. Common POSIX-only redirect targets (`/dev/null`) are rewritten
+// to their cmd equivalent (`nul`) so a quiet probe like
+// `command -v codex >/dev/null 2>&1` continues to silence its output
+// instead of failing to open the missing /dev/null path. Other command
+// forms are returned unchanged.
+func checkCommandForOS(checkCmd, goos string) string {
+	if goos != platform.GOOSWindows {
+		return checkCmd
+	}
+	m := commandVRegexp.FindStringSubmatch(checkCmd)
+	if m == nil {
+		return checkCmd
+	}
+	binary, rest := m[1], m[2]
+	rest = strings.ReplaceAll(rest, "/dev/null", "nul")
+	return "where " + binary + rest
+}
+
 // Check verifies the agent executable is available.
 func (a *CLIAgent) Check(ctx context.Context, rt Runtime) error {
+	if err := requireBashOnWindowsHost(rt); err != nil {
+		return fmt.Errorf("%s: %w", a.Name(), err)
+	}
 	checkCmd := a.Cfg.CheckCmd
 	if checkCmd == "" {
 		return fmt.Errorf("CheckCmd not configured for agent %s", a.Name())
 	}
+	checkCmd = checkCommandForOS(checkCmd, rt.TargetGOOS())
 
 	result, err := rt.Exec(ctx, checkCmd, a.mergeExecOptionsEnv(ctx, ExecOptions{}, nil, nil))
 	if err != nil {
diff --git a/internal/agent/cli_test.go b/internal/agent/cli_test.go
index 7eedc70..beffd90 100644
--- a/internal/agent/cli_test.go
+++ b/internal/agent/cli_test.go
@@ -4,13 +4,28 @@ import (
 	"context"
 	"os"
 	"path/filepath"
+	goruntime "runtime"
 	"testing"
 
+	"github.com/alibaba/skill-up/internal/platform"
 	"github.com/alibaba/skill-up/internal/runtime"
 	"github.com/alibaba/skill-up/pkg/transcript"
 )
 
+// skipIfNoPOSIXShell skips tests that rely on a POSIX shell at the host
+// (RunCmd/InstallSkillCmd/InstallMCPCmd templates baked with bash builtins,
+// `&&` pipelines, `$VAR` expansion, etc.). On Windows skill-up's none runtime
+// uses cmd.exe, which can't interpret those constructs — agent execution on
+// native Windows is intentionally out of scope; users go through WSL2.
+func skipIfNoPOSIXShell(t *testing.T) {
+	t.Helper()
+	if goruntime.GOOS == platform.GOOSWindows {
+		t.Skip("POSIX-shell agent template; native Windows agent execution is unsupported (use WSL2)")
+	}
+}
+
 func TestCLIAgent_Run(t *testing.T) {
+	skipIfNoPOSIXShell(t)
 	t.Parallel()
 
 	// Use NoneRuntime as the test runtime
@@ -41,6 +56,7 @@ func TestCLIAgent_Run(t *testing.T) {
 }
 
 func TestCLIAgent_RunExitError(t *testing.T) {
+	skipIfNoPOSIXShell(t)
 	t.Parallel()
 
 	rt := &runtime.NoneRuntime{}
@@ -172,6 +188,7 @@ func TestCLIAgent_InstallSkillDefault(t *testing.T) {
 }
 
 func TestCLIAgent_InstallSkillWithCmd(t *testing.T) {
+	skipIfNoPOSIXShell(t)
 	t.Parallel()
 
 	rt := &runtime.NoneRuntime{}
@@ -259,6 +276,7 @@ func TestCLIAgent_InstallMCPConfiguredServersRequireInstallCommand(t *testing.T)
 }
 
 func TestCLIAgent_InstallMCPUsesResolvedEndpointConfigRefAndEnv(t *testing.T) {
+	skipIfNoPOSIXShell(t)
 	t.Parallel()
 
 	rt := &runtime.NoneRuntime{}
@@ -298,3 +316,23 @@ func TestCLIAgent_InstallMCPUsesResolvedEndpointConfigRefAndEnv(t *testing.T) {
 		t.Fatalf("marker content: got %q, want %q", string(data), want)
 	}
 }
+
+func TestCheckCommandForOS(t *testing.T) {
+	tests := []struct {
+		name, in, goos, want string
+	}{
+		{"posix unchanged", "command -v codex", "linux", "command -v codex"},
+		{"darwin unchanged", "command -v claude", "darwin", "command -v claude"},
+		{"windows translates", "command -v codex", platform.GOOSWindows, "where codex"},
+		{"windows redirects /dev/null", "command -v codex >/dev/null 2>&1", platform.GOOSWindows, "where codex >nul 2>&1"},
+		{"windows stderr /dev/null", "command -v claude 2>/dev/null", platform.GOOSWindows, "where claude 2>nul"},
+		{"windows non-command form unchanged", "codex --version", platform.GOOSWindows, "codex --version"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := checkCommandForOS(tt.in, tt.goos); got != tt.want {
+				t.Fatalf("checkCommandForOS(%q, %q) = %q, want %q", tt.in, tt.goos, got, tt.want)
+			}
+		})
+	}
+}
diff --git a/internal/agent/codex.go b/internal/agent/codex.go
index df6eb3a..3c9623f 100644
--- a/internal/agent/codex.go
+++ b/internal/agent/codex.go
@@ -224,6 +224,9 @@ func (a *CodexAgent) CheckCredentials(ctx context.Context) error {
 //
 //nolint:dupl
 func (a *CodexAgent) Run(ctx context.Context, rt Runtime, opts ExecOptions, messages []transcript.Message) (*SessionResult, error) {
+	if err := requireBashOnWindowsHost(rt); err != nil {
+		return nil, fmt.Errorf("%s: %w", a.Name(), err)
+	}
 	start := time.Now()
 
 	instruction := BuildInstructionFromMessages(messages)
diff --git a/internal/agent/codex_test.go b/internal/agent/codex_test.go
index 49053e2..e2f6b73 100644
--- a/internal/agent/codex_test.go
+++ b/internal/agent/codex_test.go
@@ -1006,3 +1006,4 @@ func (r *codexTestRuntime) Workspace() string { return r.workspace }
 func (r *codexTestRuntime) RequiresProcessSandbox() bool {
 	return r.workspace != "opensandbox"
 }
+func (r *codexTestRuntime) TargetGOOS() string { return "linux" }
diff --git a/internal/agent/path_other.go b/internal/agent/path_other.go
new file mode 100644
index 0000000..073bc2d
--- /dev/null
+++ b/internal/agent/path_other.go
@@ -0,0 +1,8 @@
+//go:build !windows
+
+package agent
+
+// agentExecutablePath prepends the agent CLI's installed-from-bootstrap
+// locations onto PATH so the nvm-managed node and ~/.local/bin shims win over
+// any older system installs.
+var agentExecutablePath = "$HOME/.local/bin:$HOME/.nvm/current/bin:$PATH"
diff --git a/internal/agent/path_windows.go b/internal/agent/path_windows.go
new file mode 100644
index 0000000..860a1aa
--- /dev/null
+++ b/internal/agent/path_windows.go
@@ -0,0 +1,10 @@
+//go:build windows
+
+package agent
+
+// agentExecutablePath is empty on Windows: the POSIX bootstrap does not run
+// natively, and overriding PATH with a colon-separated string would break
+// the host's `where` lookups (and conflict with case-insensitive `Path`).
+// Letting the inherited environment flow through keeps preinstalled CLIs
+// reachable.
+var agentExecutablePath = ""
diff --git a/internal/agent/qodercli.go b/internal/agent/qodercli.go
index 08dc2b0..08c398c 100644
--- a/internal/agent/qodercli.go
+++ b/internal/agent/qodercli.go
@@ -65,6 +65,9 @@ func (a *QoderCLIAgent) CheckCredentials(ctx context.Context) error {
 //
 //nolint:dupl
 func (a *QoderCLIAgent) Run(ctx context.Context, rt Runtime, opts ExecOptions, messages []transcript.Message) (*SessionResult, error) {
+	if err := requireBashOnWindowsHost(rt); err != nil {
+		return nil, fmt.Errorf("%s: %w", a.Name(), err)
+	}
 	start := time.Now()
 
 	instruction := BuildInstructionFromMessages(messages)
diff --git a/internal/agent/qodercli_test.go b/internal/agent/qodercli_test.go
index 7294163..994418e 100644
--- a/internal/agent/qodercli_test.go
+++ b/internal/agent/qodercli_test.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"os"
 	"path/filepath"
+	goruntime "runtime"
 	"strings"
 	"testing"
 	"time"
@@ -293,6 +294,13 @@ func TestFindQoderSessionFile(t *testing.T) {
 }
 
 func TestFindQoderSessionFile_SelectsNewestByModTime(t *testing.T) {
+	if goruntime.GOOS == "windows" {
+		// The workspace-key path layout embeds a Linux-style workspace path,
+		// which contains a colon on Windows (e.g. `C:`) and cannot be a
+		// directory component. Qoder native Windows agent execution is out of
+		// scope; this test is exercised on Linux/darwin only.
+		t.Skip("qoder workspace-key path layout is POSIX-only")
+	}
 	tmpHome := t.TempDir()
 	t.Setenv("HOME", tmpHome)
 
@@ -397,3 +405,4 @@ func (r *qoderTestRuntime) Workspace() string { return r.workspace }
 func (r *qoderTestRuntime) RequiresProcessSandbox() bool {
 	return true
 }
+func (r *qoderTestRuntime) TargetGOOS() string { return "linux" }
diff --git a/internal/cli/import.go b/internal/cli/import.go
index 7a78a6b..71bc5e5 100644
--- a/internal/cli/import.go
+++ b/internal/cli/import.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"os"
+	"path"
 	"path/filepath"
 
 	"github.com/spf13/cobra"
@@ -156,7 +157,9 @@ func generateEvalConfig(caseIDs []string, outputDir string) *config.EvalConfig {
 
 	caseFiles := make([]string, len(caseIDs))
 	for i, caseID := range caseIDs {
-		caseFiles[i] = filepath.Join(relPrefix, "cases", caseID+".yaml")
+		// eval.yaml is a portable config consumed by the loader on any OS;
+		// always use forward slashes for case paths regardless of the host.
+		caseFiles[i] = path.Join(filepath.ToSlash(relPrefix), "cases", caseID+".yaml")
 	}
 
 	cfg.Cases.Files = caseFiles
diff --git a/internal/evaluator/evaluator_test.go b/internal/evaluator/evaluator_test.go
index a6e1f9a..279bfc1 100644
--- a/internal/evaluator/evaluator_test.go
+++ b/internal/evaluator/evaluator_test.go
@@ -8,6 +8,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	goruntime "runtime"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -96,6 +97,7 @@ func (m *mockRuntime) Create(_ context.Context) error                  { return
 func (m *mockRuntime) Close() error                                    { return nil }
 func (m *mockRuntime) Workspace() string                               { return m.workspace }
 func (m *mockRuntime) RequiresProcessSandbox() bool                    { return true }
+func (m *mockRuntime) TargetGOOS() string                              { return "linux" }
 func (m *mockRuntime) Start(_ context.Context) error                   { return nil }
 func (m *mockRuntime) Stop(_ context.Context) error                    { return nil }
 func (m *mockRuntime) UploadFile(_ context.Context, _, _ string) error { return nil }
@@ -2490,7 +2492,7 @@ func TestContextFilesUploader_RejectsUnsafePaths(t *testing.T) {
 	}{
 		{name: "empty", path: ""},
 		{name: "workspace root", path: "."},
-		{name: "absolute", path: filepath.Join(string(filepath.Separator), "tmp", "secret.txt")},
+		{name: "absolute", path: absoluteSecretPath()},
 		{name: "parent traversal", path: "../secret.txt"},
 		{name: "nested parent traversal", path: "fixtures/../../secret.txt"},
 	}
@@ -2709,3 +2711,13 @@ func TestGitInitUploader_InitWithRemotes(t *testing.T) {
 		t.Fatalf("expected remote URL in output, got %s", result.Stdout)
 	}
 }
+
+// absoluteSecretPath returns a path that filepath.IsAbs reports as absolute on
+// the host OS. On Windows that requires a drive letter; `\tmp\secret.txt`
+// alone is considered relative.
+func absoluteSecretPath() string {
+	if goruntime.GOOS == "windows" {
+		return `C:\tmp\secret.txt`
+	}
+	return "/tmp/secret.txt"
+}
diff --git a/internal/evaluator/fixtures.go b/internal/evaluator/fixtures.go
index 4aa46ca..2ac64d2 100644
--- a/internal/evaluator/fixtures.go
+++ b/internal/evaluator/fixtures.go
@@ -129,7 +129,7 @@ func (g *gitInitUploader) Upload(ctx context.Context, rt runtime.Runtime, caseCf
 			return err
 		}
 		fmt.Fprintf(&script, "git remote add %s %s\n",
-			shellquote.Quote(remote.Name), shellquote.Quote(remote.URL))
+			shellquote.QuotePOSIX(remote.Name), shellquote.QuotePOSIX(remote.URL))
 	}
 
 	result, err := rt.Exec(ctx, script.String(), runtime.ExecOptions{
@@ -170,7 +170,10 @@ func (g *gitCheckoutUploader) Upload(ctx context.Context, rt runtime.Runtime, ca
 	// invocations and is never interpolated into the error message (a
 	// double-quoted echo would re-evaluate command substitutions); the Go
 	// error below already reports the branch name safely via %q.
-	quoted := shellquote.Quote(branch)
+	// The script runs in a POSIX shell inside the runtime (either the
+	// `none` runtime on a POSIX host, or the Linux OpenSandbox), so quote
+	// with POSIX rules explicitly rather than the host-aware Quote.
+	quoted := shellquote.QuotePOSIX(branch)
 	script := fmt.Sprintf("set -eu\n"+
 		"if git switch %[1]s 2>/dev/null; then :\n"+
 		"elif ! git rev-parse --verify --quiet HEAD >/dev/null 2>&1; then git switch -c %[1]s\n"+
@@ -222,7 +225,7 @@ func (a *applyDiffUploader) Upload(ctx context.Context, rt runtime.Runtime, case
 	}
 
 	// Quote the tmp path and use `--` to stop git from treating the path as an option.
-	result, err := rt.Exec(ctx, "git apply -- "+shellquote.Quote(tmpPath), runtime.ExecOptions{
+	result, err := rt.Exec(ctx, "git apply -- "+shellquote.QuotePOSIX(tmpPath), runtime.ExecOptions{
 		Cwd: rt.Workspace(),
 	})
 	if err != nil {
diff --git a/internal/judge/e2e_test.go b/internal/judge/e2e_test.go
index 0c17f54..664c214 100644
--- a/internal/judge/e2e_test.go
+++ b/internal/judge/e2e_test.go
@@ -35,6 +35,7 @@ import (
 	"testing"
 
 	"github.com/alibaba/skill-up/internal/config"
+	"github.com/alibaba/skill-up/internal/platform"
 	"github.com/alibaba/skill-up/pkg/transcript"
 )
 
@@ -616,15 +617,12 @@ func TestE2E_GradingJSON_SkipResult_Format(t *testing.T) {
 // ---------------------------------------------------------------------------
 
 func TestE2E_ScriptJudge_FullPipeline(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("skipping script test on windows")
-	}
-
 	dir := t.TempDir()
 
 	// Create the evaluation script: check whether EVAL_FINAL_MESSAGE
-	// contains "bug".
-	scriptContent := `#!/bin/sh
+	// contains "bug". The script judge dispatches by extension, so the host
+	// OS determines whether a POSIX shell or a Windows batch script is used.
+	scriptName, scriptContent := "eval_check.sh", `#!/bin/sh
 # Evaluation script: check whether the agent output identifies the bug.
 # Reads the EVAL_FINAL_MESSAGE env var (injected by ScriptJudge).
 if echo "$EVAL_FINAL_MESSAGE" | grep -q "bug"; then
@@ -635,7 +633,18 @@ else
   exit 1
 fi
 `
-	scriptPath := writeScript(t, dir, "eval_check.sh", scriptContent)
+	if runtime.GOOS == platform.GOOSWindows {
+		scriptName, scriptContent = "eval_check.cmd", "@echo off\r\n"+
+			"echo %EVAL_FINAL_MESSAGE% | findstr /C:\"bug\" >nul\r\n"+
+			"if %errorlevel%==0 (\r\n"+
+			"  echo Agent correctly identified the bug\r\n"+
+			"  exit /b 0\r\n"+
+			") else (\r\n"+
+			"  echo Agent failed to identify the bug\r\n"+
+			"  exit /b 1\r\n"+
+			")\r\n"
+	}
+	scriptPath := writeScript(t, dir, scriptName, scriptContent)
 
 	expectCfg := &config.Expect{}
 
diff --git a/internal/judge/helpers_test.go b/internal/judge/helpers_test.go
index 5b3fc0f..75f1c40 100644
--- a/internal/judge/helpers_test.go
+++ b/internal/judge/helpers_test.go
@@ -120,6 +120,7 @@ func (m *mockJudgeTestRuntime) Create(_ context.Context) error
 func (m *mockJudgeTestRuntime) Close() error                                      { return nil }
 func (m *mockJudgeTestRuntime) Workspace() string                                 { return "/tmp/test" }
 func (m *mockJudgeTestRuntime) RequiresProcessSandbox() bool                      { return true }
+func (m *mockJudgeTestRuntime) TargetGOOS() string                                { return "linux" }
 func (m *mockJudgeTestRuntime) Start(_ context.Context) error                     { return nil }
 func (m *mockJudgeTestRuntime) Stop(_ context.Context) error                      { return nil }
 func (m *mockJudgeTestRuntime) UploadFile(_ context.Context, _, _ string) error   { return nil }
diff --git a/internal/judge/interpreter.go b/internal/judge/interpreter.go
new file mode 100644
index 0000000..f5af1c2
--- /dev/null
+++ b/internal/judge/interpreter.go
@@ -0,0 +1,552 @@
+package judge
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/alibaba/skill-up/internal/platform"
+	"github.com/alibaba/skill-up/internal/shellquote"
+)
+
+// scriptPlan describes how the script judge uploads and runs an evaluation
+// script in a runtime whose commands execute on a particular target OS.
+type scriptPlan struct {
+	// uploadName is the basename the script is uploaded as. The extension is
+	// preserved so the interpreter dispatch stays unambiguous.
+	uploadName string
+	// command builds the runtime Exec command string for the uploaded script
+	// at remoteScript (its path inside the runtime).
+	command func(remoteScript string) string
+	// cleanupCommand builds the command that recursively removes the
+	// per-judge temp dir on the target OS, using the same quoting rules as
+	// command so the same shell ultimately interprets it.
+	cleanupCommand func(dir string) string
+	// envPath translates a runtime-side path (e.g. EVAL_TRANSCRIPT_PATH)
+	// into the form the script's interpreter will accept. Identity for most
+	// targets; the Windows `.sh` plan converts to forward slashes so POSIX
+	// tools running inside Git Bash can open the file.
+	envPath func(p string) string
+}
+
+// identityEnvPath is the default envPath used by plans that need no
+// translation between the runtime-side path and the script's view of it.
+func identityEnvPath(p string) string { return p }
+
+// planScript determines how to execute scriptPath in a runtime whose commands
+// run on targetGOOS.
+//
+// POSIX targets keep the original behavior: the script is uploaded verbatim
+// and run via its own shebang. Windows targets dispatch to an interpreter
+// based on the file extension (or shebang when the extension is absent).
+func planScript(scriptPath, targetGOOS string) (scriptPlan, error) {
+	if targetGOOS != platform.GOOSWindows {
+		return scriptPlan{
+			uploadName: "script",
+			command: func(remoteScript string) string {
+				q := shellquote.QuotePOSIX(remoteScript)
+				return "chmod 700 " + q + " && " + q
+			},
+			cleanupCommand: func(dir string) string {
+				return "rm -rf " + shellquote.QuotePOSIX(dir)
+			},
+			envPath: identityEnvPath,
+		}, nil
+	}
+	return planWindowsScript(scriptPath, platform.Host())
+}
+
+func planWindowsScript(scriptPath string, shell platform.HostShell) (scriptPlan, error) {
+	// Read and classify the shebang once: both the .ps1 (pwsh-vs-powershell
+	// selection, option forwarding) and .sh (bash strict-mode flags) plans
+	// consume the result. shebangInterp is "" when no recognized shebang
+	// was found.
+	shebangInterp, shebangOpts := parseShebang(readShebang(scriptPath))
+
+	ext := strings.ToLower(filepath.Ext(scriptPath))
+	if ext == "" {
+		ext = extensionForShebangInterpreter(shebangInterp)
+	}
+
+	// Every command we emit (script run + cleanup) is quoted with the host
+	// shell's own quoter so the same shell semantics applies end to end.
+	// shell.Quote already accounts for bash-vs-cmd selection -- there is no
+	// second discovery here, ruling out the chance of the two decisions
+	// disagreeing.
+	quote := shell.Quote
+	// .ps1 / .cmd / .bat all execute through cmd.exe, so cleanup through cmd
+	// keeps quoting / strip semantics inside one shell. The `/d /s /c` flags
+	// match the cmd fallback in platform.Host so the strip rule behaves the
+	// same way for the inner command.
+	winCleanup := func(dir string) string {
+		return "cmd /d /s /c rd /s /q " + quote(dir)
+	}
+
+	switch ext {
+	case ".ps1":
+		// Pick pwsh.exe (PowerShell Core 7+) when the shebang explicitly
+		// asks for it; default to powershell.exe (Windows PowerShell 5.x)
+		// otherwise. Forward any shebang-encoded interpreter flags so
+		// `#!/usr/bin/env -S pwsh -NoLogo` does not silently lose -NoLogo
+		// when we re-invoke the interpreter ourselves. -NoProfile and
+		// -ExecutionPolicy Bypass are always appended because Windows
+		// PowerShell's default Restricted policy blocks unsigned scripts;
+		// the duplicate -NoProfile case is harmless if the shebang also
+		// sets it.
+		psBinary := psInterpLegacy
+		if shebangInterp == psInterpCore {
+			psBinary = psInterpCore
+		}
+		psArgs := []string{psBinary}
+		// Only forward shebang opts when the shebang itself names a
+		// PowerShell interpreter -- a `.ps1` file with a bogus
+		// `#!/bin/bash -eu` shebang otherwise leaks bash flags into the
+		// powershell invocation.
+		if shebangInterp == psInterpCore || shebangInterp == psInterpLegacy {
+			for _, o := range shebangOpts {
+				psArgs = append(psArgs, quote(o))
+			}
+		}
+		psArgs = append(psArgs, "-NoProfile", "-ExecutionPolicy", "Bypass", "-File")
+		return scriptPlan{
+			uploadName: "script.ps1",
+			command: func(remoteScript string) string {
+				// Defensive copy: appending to the captured psArgs
+				// directly would mutate its backing array if cap
+				// exceeds len. The .sh branch below does the same.
+				args := append([]string{}, psArgs...)
+				args = append(args, quote(remoteScript))
+				return strings.Join(args, " ")
+			},
+			cleanupCommand: winCleanup,
+			envPath:        identityEnvPath,
+		}, nil
+	case ".cmd", ".bat":
+		return scriptPlan{
+			uploadName: "script" + ext,
+			command: func(remoteScript string) string {
+				return "cmd /d /s /c " + quote(remoteScript)
+			},
+			cleanupCommand: winCleanup,
+			envPath:        identityEnvPath,
+		}, nil
+	case ".sh", ".bash":
+		if !shell.IsBash {
+			return scriptPlan{}, fmt.Errorf(
+				"script judge: .sh script requires bash on Windows; install Git Bash or set %s",
+				platform.BashEnvOverride)
+		}
+		// Forward shebang-encoded options (`#!/bin/bash -eu`,
+		// `#!/usr/bin/env -S bash -eu`, ...) so strict-mode flags that
+		// POSIX honors via shebang aren't silently dropped when we invoke
+		// bash explicitly on Windows.
+		bashArgs := []string{quote(shell.Bash)}
+		// Forward only when the shebang actually names a POSIX shell, so a
+		// `.sh` file with an unrelated shebang (e.g. `#!/usr/bin/env pwsh`
+		// renamed to .sh) does not feed PowerShell flags to bash.
+		if shebangPOSIXShells[shebangInterp] {
+			for _, o := range shebangOpts {
+				bashArgs = append(bashArgs, quote(o))
+			}
+		}
+		return scriptPlan{
+			uploadName: "script.sh",
+			command: func(remoteScript string) string {
+				// bash on Windows accepts forward-slash paths; we also
+				// keep EVAL_TRANSCRIPT_PATH in that form (see envPath
+				// below) so POSIX tools inside the script can `cat` it.
+				args := append([]string{}, bashArgs...)
+				args = append(args, quote(filepath.ToSlash(remoteScript)))
+				return strings.Join(args, " ")
+			},
+			// Stay inside bash for cleanup too. The .sh case already runs
+			// through bash -c, and Git Bash's rm understands the
+			// forward-slash form of the Windows temp dir natively, so
+			// `rm -rf <forward-slash dir>` avoids the bash -> cmd hop the
+			// other extensions go through.
+			cleanupCommand: func(dir string) string {
+				return "rm -rf " + quote(filepath.ToSlash(dir))
+			},
+			envPath: filepath.ToSlash,
+		}, nil
+	default:
+		return scriptPlan{}, fmt.Errorf(
+			"script judge: cannot determine interpreter for %s on Windows: "+
+				"add a .sh, .ps1, or .cmd extension or a shebang",
+			filepath.Base(scriptPath))
+	}
+}
+
+// judgeTempDir returns an absolute temporary directory for a single script
+// judge run, appropriate for the target OS.
+func judgeTempDir(targetGOOS string) string {
+	name := fmt.Sprintf("skill-up-judge-%d", time.Now().UnixNano())
+	if targetGOOS == platform.GOOSWindows {
+		return filepath.Join(os.TempDir(), name)
+	}
+	return path.Join("/tmp", name)
+}
+
+// joinForGOOS joins path elements using the separator of the target OS.
+func joinForGOOS(targetGOOS string, elem ...string) string {
+	if targetGOOS == platform.GOOSWindows {
+		return filepath.Join(elem...)
+	}
+	return path.Join(elem...)
+}
+
+// shebangPOSIXShells lists interpreter basenames that the Windows planner
+// is willing to dispatch through Git Bash. Only `sh` and `bash` are listed:
+// the Windows `.sh` runner always invokes the discovered bash, so dropping
+// a `#!/usr/bin/env zsh` script into bash would silently change semantics.
+// Other POSIX shells (dash, ksh, zsh, ash) are intentionally rejected so
+// the planner returns "cannot determine interpreter" rather than mis-routing.
+// On POSIX targets this list is irrelevant: planScript ignores extension
+// and lets the kernel honor the script's actual shebang.
+var shebangPOSIXShells = map[string]bool{
+	"sh": true, "bash": true,
+}
+
+// PowerShell interpreter basenames recognized by the Windows planner.
+// `pwsh` is PowerShell Core 7+; `powershell` is the legacy Windows
+// PowerShell 5.x. They differ in syntax and module set, so we track them
+// separately and pick the binary that matches the shebang when present.
+const (
+	psInterpCore   = "pwsh"
+	psInterpLegacy = "powershell"
+)
+
+// extensionForShebangInterpreter maps a parsed shebang interpreter basename
+// to the synthetic file extension planWindowsScript dispatches on. Returns
+// "" for empty / unrecognized interpreters so the planner reports
+// "cannot determine interpreter" rather than mis-routing the script.
+func extensionForShebangInterpreter(interp string) string {
+	if interp == "" {
+		return ""
+	}
+	switch interp {
+	case psInterpCore, psInterpLegacy:
+		return ".ps1"
+	}
+	if shebangPOSIXShells[interp] {
+		return ".sh"
+	}
+	return ""
+}
+
+// shebangExtension reads scriptPath's first line and returns the synthetic
+// extension implied by its shebang. Kept as a thin wrapper for tests that
+// exercise the full path → extension flow.
+func shebangExtension(scriptPath string) string {
+	interp, _ := parseShebang(readShebang(scriptPath))
+	return extensionForShebangInterpreter(interp)
+}
+
+// readShebang returns the body of scriptPath's first line when it is a
+// shebang (everything after `#!`), or "" when there is no recognizable
+// shebang or the file cannot be opened.
+func readShebang(scriptPath string) string {
+	f, err := os.Open(scriptPath) //nolint:gosec // scriptPath is a caller-provided evaluation script
+	if err != nil {
+		return ""
+	}
+	defer func() { _ = f.Close() }()
+
+	sc := bufio.NewScanner(f)
+	if !sc.Scan() {
+		return ""
+	}
+	line := strings.TrimSpace(sc.Text())
+	if !strings.HasPrefix(line, "#!") {
+		return ""
+	}
+	return line[2:]
+}
+
+// parseShebang splits a shebang body into (interpreter basename, options
+// passed through to the interpreter). It understands direct paths
+// (`#!/bin/bash -eu`), the `/usr/bin/env <name>` form, and both the
+// split `env -S <body>` and compact `env -S<body>` GNU extensions.
+//
+// The `-S <body>` body is treated as whitespace-separated tokens; nested
+// shell quoting inside -S (e.g. `-S bash -c "echo hi"`) is not parsed and
+// the embedded quotes survive in the returned options. Real-world judge
+// shebangs use only flag-style options, where this approximation is fine.
+//
+// Returns ("", nil) when the body has no usable interpreter.
+func parseShebang(body string) (string, []string) {
+	fields := strings.Fields(body)
+	if len(fields) == 0 {
+		return "", nil
+	}
+	if filepath.Base(fields[0]) == "env" {
+		return parseEnvShebang(fields[1:])
+	}
+	return filepath.Base(fields[0]), append([]string{}, fields[1:]...)
+}
+
+// envValueTakingShortFlags lists GNU env short flags that consume a separate
+// value token after them in a shebang.
+var envValueTakingShortFlags = map[string]bool{
+	"-u": true, // --unset NAME
+	"-C": true, // --chdir DIR
+	"-a": true, // --argv0 ARG (override argv[0])
+}
+
+// envValueTakingLongFlags lists GNU env long flags that accept their value
+// as the next token (the `--name=value` form is self-contained and handled
+// by the generic `strings.HasPrefix(f, "-")` skip arm).
+//
+// Intentionally omitted: --block-signal, --default-signal, --ignore-signal.
+// GNU env documents those as OPTIONAL-argument flags (`[=sig]`) -- without
+// the `=sig` suffix they take no value, so consuming the next token would
+// swallow the interpreter (e.g. `env --ignore-signal bash` would otherwise
+// be parsed as flag `--ignore-signal` with value `bash` and no interpreter
+// remaining).
+var envValueTakingLongFlags = map[string]bool{
+	"--unset": true,
+	"--chdir": true,
+	"--argv0": true,
+}
+
+// isEnvAssignment reports whether tok has the env `NAME=VALUE` shape that
+// GNU env accepts in front of the command. The portable form is
+// `[A-Za-z_][A-Za-z0-9_]*=...`; everything before the first `=` must be a
+// valid C-identifier byte sequence.
+func isEnvAssignment(tok string) bool {
+	eq := strings.IndexByte(tok, '=')
+	if eq <= 0 {
+		return false
+	}
+	for i := range eq {
+		c := tok[i]
+		switch {
+		case c == '_':
+		case c >= 'A' && c <= 'Z':
+		case c >= 'a' && c <= 'z':
+		case i > 0 && c >= '0' && c <= '9':
+		default:
+			return false
+		}
+	}
+	return true
+}
+
+// parseEnvShebang processes the args after `env` in a shebang line.
+func parseEnvShebang(args []string) (string, []string) {
+	for i := 0; i < len(args); { //nolint:intrange // we conditionally advance i by 2 when consuming flag values
+		f := args[i]
+		switch {
+		case f == "-S" || f == "--split-string":
+			// Split form: -S takes everything that follows as one logical
+			// string. On a kernel shebang only one arg reaches env, so
+			// rejoining with spaces matches what env would receive.
+			return splitStringInterpreter(strings.Join(args[i+1:], " "))
+		case strings.HasPrefix(f, "--split-string="):
+			// Long compact: --split-string=<body>; any trailing tokens
+			// were whitespace-split out of the same logical arg, so
+			// rejoin them like the kernel-shebang one-arg rule.
+			rest := strings.TrimPrefix(f, "--split-string=")
+			if i+1 < len(args) {
+				if rest != "" {
+					rest += " "
+				}
+				rest += strings.Join(args[i+1:], " ")
+			}
+			return splitStringInterpreter(rest)
+		case strings.HasPrefix(f, "-S"):
+			// Compact form: -S<body> packs the split-string into the same
+			// token. Strip the -S prefix and concatenate any trailing
+			// tokens with a space (mirroring the kernel-shebang one-arg
+			// rule).
+			rest := strings.TrimPrefix(f, "-S")
+			if i+1 < len(args) {
+				if rest != "" {
+					rest += " "
+				}
+				rest += strings.Join(args[i+1:], " ")
+			}
+			return splitStringInterpreter(rest)
+		case envValueTakingShortFlags[f], envValueTakingLongFlags[f]:
+			// Consume the flag and its separate value token (e.g.
+			// `-u VAR`, `-C DIR`, `--unset FOO`, `--chdir /tmp`) so the
+			// value token does not get mistaken for the interpreter.
+			i += 2
+		case strings.HasPrefix(f, "-"):
+			// Self-contained env flag (-i, --ignore-environment,
+			// --chdir=DIR, --unset=VAR, ...). Skip the single token.
+			i++
+		default:
+			// First non-flag token. GNU env allows leading `NAME=VALUE`
+			// assignments before the command (e.g.
+			// `env PYTHONPATH=/opt python3`); the helper skips those so we
+			// land on the real interpreter.
+			return interpreterFromArgs(args[i:])
+		}
+	}
+	return "", nil
+}
+
+// interpreterFromArgs returns the interpreter and trailing args after
+// skipping leading GNU env `NAME=VALUE` assignments. The first non-
+// assignment token is the interpreter; everything after it is forwarded.
+func interpreterFromArgs(args []string) (string, []string) {
+	i := 0
+	for i < len(args) && isEnvAssignment(args[i]) {
+		i++
+	}
+	if i >= len(args) {
+		return "", nil
+	}
+	return filepath.Base(args[i]), append([]string{}, args[i+1:]...)
+}
+
+// splitStringInterpreter parses the body that env's -S would split. The
+// tokenizer respects single and double quotes plus backslash escapes so
+// shebangs like `#!/usr/bin/env -S bash -c "echo ok"` produce the same argv
+// as a POSIX kernel-shebang dispatch. Full env-S escape sequences (\xHH,
+// \n, \t, etc.) are not decoded -- they are rarely used in real script-judge
+// shebangs and decoding them would not survive in the cmd-fallback path
+// anyway. Unterminated quotes return ("", nil) which the planner converts
+// into a "cannot determine interpreter" error.
+func splitStringInterpreter(body string) (string, []string) {
+	tokens, err := tokenizeShebangSplitString(body)
+	if err != nil || len(tokens) == 0 {
+		return "", nil
+	}
+	// GNU env allows leading `NAME=VALUE` assignments inside -S too
+	// (e.g. `env -S PYTHONPATH=/opt python3`); reuse the same skip helper.
+	return interpreterFromArgs(tokens)
+}
+
+// tokenizerState carries the small bit of state the tokenizer mutates as it
+// walks body. Keeping it in a struct lets the per-state handlers stay small
+// enough that the top-level loop stays under gocyclo's threshold.
+type tokenizerState struct {
+	cur      strings.Builder
+	tokens   []string
+	inSingle bool
+	inDouble bool
+	started  bool
+	skipNext bool // when true, the next byte was consumed as an escape
+	done     bool // set by `\c` in unquoted context: ignore the rest of body
+}
+
+func (s *tokenizerState) flush() {
+	if s.started {
+		s.tokens = append(s.tokens, s.cur.String())
+		s.cur.Reset()
+		s.started = false
+	}
+}
+
+// tokenizeShebangSplitString splits body into shell-style tokens used by
+// env -S: whitespace separates tokens; single quotes preserve every
+// character literally until the next single quote; double quotes preserve
+// characters with `\"` and `\\` decoded; outside quotes a backslash escapes
+// the next character. Returns an error when a quote is left unterminated.
+func tokenizeShebangSplitString(body string) ([]string, error) {
+	s := &tokenizerState{}
+	for i := 0; i < len(body); i++ { //nolint:intrange // we conditionally advance i to consume escape sequences
+		if s.skipNext {
+			s.skipNext = false
+			continue
+		}
+		next := byte(0)
+		if i+1 < len(body) {
+			next = body[i+1]
+		}
+		switch {
+		case s.inSingle:
+			tokenizeStepSingle(s, body[i])
+		case s.inDouble:
+			tokenizeStepDouble(s, body[i], next)
+		default:
+			tokenizeStepUnquoted(s, body[i], next)
+		}
+		if s.done {
+			break
+		}
+	}
+	if s.inSingle || s.inDouble {
+		return nil, fmt.Errorf("unterminated quote in shebang -S body: %q", body)
+	}
+	s.flush()
+	return s.tokens, nil
+}
+
+func tokenizeStepSingle(s *tokenizerState, c byte) {
+	if c == '\'' {
+		s.inSingle = false
+	} else {
+		s.cur.WriteByte(c)
+	}
+	s.started = true
+}
+
+func tokenizeStepDouble(s *tokenizerState, c, next byte) {
+	switch {
+	case c == '"':
+		s.inDouble = false
+	case c == '\\' && (next == '"' || next == '\\'):
+		s.cur.WriteByte(next)
+		s.skipNext = true
+	default:
+		s.cur.WriteByte(c)
+	}
+	s.started = true
+}
+
+// envSWhitespaceEscapes lists the env -S backslash escapes that decode to a
+// whitespace character. In unquoted context they act as token separators --
+// most importantly `\_`, which is the documented way to embed a space
+// inside an env -S body without surrounding quotes (e.g. `bash\_-eu`).
+var envSWhitespaceEscapes = map[byte]bool{
+	'_': true, // space
+	't': true,
+	'n': true,
+	'r': true,
+	'v': true,
+	'f': true,
+}
+
+func tokenizeStepUnquoted(s *tokenizerState, c, next byte) {
+	switch c {
+	case ' ', '\t', '\n', '\v', '\r', '\f':
+		s.flush()
+	case '\'':
+		s.inSingle = true
+		s.started = true
+	case '"':
+		s.inDouble = true
+		s.started = true
+	case '\\':
+		if next == 0 {
+			return
+		}
+		switch {
+		case next == 'c':
+			// GNU env -S documents `\c` (unquoted) as "ignore the rest
+			// of the split-string body". Stop tokenization right here so
+			// the planner sees only the tokens emitted so far.
+			s.flush()
+			s.skipNext = true
+			s.done = true
+			return
+		case envSWhitespaceEscapes[next]:
+			// Whitespace escape: ends the current token without writing.
+			s.flush()
+		default:
+			s.cur.WriteByte(next)
+			s.started = true
+		}
+		s.skipNext = true
+	default:
+		s.cur.WriteByte(c)
+		s.started = true
+	}
+}
diff --git a/internal/judge/interpreter_test.go b/internal/judge/interpreter_test.go
new file mode 100644
index 0000000..5d4f843
--- /dev/null
+++ b/internal/judge/interpreter_test.go
@@ -0,0 +1,349 @@
+package judge
+
+import (
+	"os"
+	"path/filepath"
+	goruntime "runtime"
+	"strings"
+	"testing"
+
+	"github.com/alibaba/skill-up/internal/platform"
+)
+
+func TestPlanScript_POSIXTarget(t *testing.T) {
+	plan, err := planScript("/skill/evals/check.sh", "linux")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if plan.uploadName != "script" {
+		t.Fatalf("uploadName = %q, want \"script\"", plan.uploadName)
+	}
+	got := plan.command("/tmp/d/script")
+	want := "chmod 700 '/tmp/d/script' && '/tmp/d/script'"
+	if got != want {
+		t.Fatalf("command = %q, want %q", got, want)
+	}
+	if got := plan.envPath("/tmp/d/transcript.json"); got != "/tmp/d/transcript.json" {
+		t.Fatalf("POSIX envPath should be identity, got %q", got)
+	}
+}
+
+// TestHostShellQuote verifies the quoter selected by platform.Host() per
+// host OS. POSIX hosts always use QuotePOSIX (single quotes). Windows hosts
+// use bash-double-quote escapes when bash is discoverable (every \ / " /
+// $ / backtick doubled so bash decodes them back to the original byte) and
+// plain QuoteWindows otherwise.
+func TestHostShellQuote(t *testing.T) {
+	shell := platform.Host()
+	got := shell.Quote(`C:\tmp\$VAR\script.cmd`)
+	if goruntime.GOOS != "windows" {
+		want := `'C:\tmp\$VAR\script.cmd'`
+		if got != want {
+			t.Fatalf("posix shell.Quote = %q, want %q", got, want)
+		}
+		return
+	}
+	if shell.IsBash {
+		want := `"C:\\tmp\\\$VAR\\script.cmd"`
+		if got != want {
+			t.Fatalf("windows+bash shell.Quote = %q, want %q", got, want)
+		}
+	} else {
+		want := `"C:\tmp\$VAR\script.cmd"`
+		if got != want {
+			t.Fatalf("windows+cmd shell.Quote = %q, want %q", got, want)
+		}
+	}
+}
+
+func TestParseShebang(t *testing.T) {
+	tests := []struct {
+		name, body string
+		wantInt    string
+		wantOpts   []string
+	}{
+		{"empty", "", "", nil},
+		{"posix sh", "/bin/sh", "sh", []string{}},
+		{"bash with opts", "/bin/bash -eu", "bash", []string{"-eu"}},
+		{"env bash", "/usr/bin/env bash", "bash", []string{}},
+		{"env -S split", "/usr/bin/env -S bash -eu", "bash", []string{"-eu"}},
+		{"env -S compact", "/usr/bin/env -Sbash -eu", "bash", []string{"-eu"}},
+		{"env -S compact full", "/usr/bin/env -Sbash\t-eu", "bash", []string{"-eu"}},
+		{"env --split-string=", "/usr/bin/env --split-string=bash -eu", "bash", []string{"-eu"}},
+		{"env -i python", "/usr/bin/env -i python3", "python3", []string{}},
+		// Value-taking short flags (-u NAME, -C DIR) must consume their
+		// value token so it does not get mistaken for the interpreter.
+		{"env -u VAR bash", "/usr/bin/env -u FOO bash -eu", "bash", []string{"-eu"}},
+		{"env -C DIR bash", "/usr/bin/env -C /tmp bash", "bash", []string{}},
+		// Long-form value-taking flags in split form too (the =NAME form
+		// is already handled by the generic "starts with -" skip arm).
+		{"env --unset split", "/usr/bin/env --unset FOO bash -eu", "bash", []string{"-eu"}},
+		{"env --chdir split", "/usr/bin/env --chdir /tmp bash", "bash", []string{}},
+		// env -S with quoted bash -c "..." must preserve the quoted arg
+		// as one token instead of breaking it on whitespace.
+		{"env -S bash -c quoted", `/usr/bin/env -S bash -c "echo ok"`, "bash", []string{"-c", "echo ok"}},
+		// env -S \_ (backslash-underscore) is the documented separator
+		// for embedding a space inside the split-string body.
+		{"env -S backslash space", `/usr/bin/env -S bash\_-eu`, "bash", []string{"-eu"}},
+		{"env -S backslash tab", `/usr/bin/env -S bash\t-eu`, "bash", []string{"-eu"}},
+		{"only env flags", "/usr/bin/env -S", "", nil},
+		// GNU env -a / --argv0=ARG overrides argv[0] of the executed
+		// command. -a takes a separate token; the parser must consume the
+		// argv0 value so it does not get mistaken for the interpreter.
+		{"env -a argv0 bash", "/usr/bin/env -a myargv0 bash -eu", "bash", []string{"-eu"}},
+		// --default-signal / --ignore-signal / --block-signal are
+		// optional-argument flags (`[=sig]`). Without `=sig` they take no
+		// value, so the next token IS the interpreter.
+		{"env --ignore-signal bash", "/usr/bin/env --ignore-signal bash -eu", "bash", []string{"-eu"}},
+		{"env --default-signal bash", "/usr/bin/env --default-signal bash", "bash", []string{}},
+		// GNU env accepts leading `NAME=VALUE` assignments before the
+		// command. The parser must skip them and pick the first
+		// non-assignment token as the interpreter.
+		{"env NAME=VALUE bash", "/usr/bin/env PYTHONPATH=/opt python3 -V", "python3", []string{"-V"}},
+		{"env -S NAME=VALUE bash", "/usr/bin/env -S PYTHONPATH=/opt python3 -V", "python3", []string{"-V"}},
+		// `\c` in unquoted env -S body terminates tokenization; tokens
+		// after it are ignored entirely.
+		{"env -S backslash c terminator", `/usr/bin/env -S bash -eu\c trailing junk`, "bash", []string{"-eu"}},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			gotInt, gotOpts := parseShebang(tt.body)
+			if gotInt != tt.wantInt {
+				t.Fatalf("interpreter = %q, want %q", gotInt, tt.wantInt)
+			}
+			if len(gotOpts) != len(tt.wantOpts) {
+				t.Fatalf("opts = %v, want %v", gotOpts, tt.wantOpts)
+			}
+			for i := range gotOpts {
+				if gotOpts[i] != tt.wantOpts[i] {
+					t.Fatalf("opts[%d] = %q, want %q", i, gotOpts[i], tt.wantOpts[i])
+				}
+			}
+		})
+	}
+}
+
+// POSIX targets preserve the original behavior: the file extension is ignored
+// and the script runs via its own shebang.
+func TestPlanScript_POSIXTarget_IgnoresExtension(t *testing.T) {
+	plan, err := planScript("/skill/evals/check.ps1", "darwin")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if plan.uploadName != "script" {
+		t.Fatalf("uploadName = %q, want \"script\"", plan.uploadName)
+	}
+}
+
+func TestPlanWindowsScript(t *testing.T) {
+	tests := []struct {
+		name        string
+		scriptPath  string
+		wantUpload  string
+		wantCmdHead string
+	}{
+		{"powershell", `C:\skill\check.ps1`, "script.ps1", "powershell -NoProfile -ExecutionPolicy Bypass -File "},
+		{"cmd", `C:\skill\check.cmd`, "script.cmd", "cmd /d /s /c "},
+		{"bat", `C:\skill\check.bat`, "script.bat", "cmd /d /s /c "},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			plan, err := planWindowsScript(tt.scriptPath, platform.Host())
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+			if plan.uploadName != tt.wantUpload {
+				t.Fatalf("uploadName = %q, want %q", plan.uploadName, tt.wantUpload)
+			}
+			if cmd := plan.command(`C:\tmp\d\` + tt.wantUpload); !strings.HasPrefix(cmd, tt.wantCmdHead) {
+				t.Fatalf("command = %q, want prefix %q", cmd, tt.wantCmdHead)
+			}
+		})
+	}
+}
+
+func TestPlanWindowsScript_UnknownInterpreter(t *testing.T) {
+	dir := t.TempDir()
+	scriptPath := filepath.Join(dir, "mystery.txt")
+	if err := os.WriteFile(scriptPath, []byte("echo hi\n"), 0o600); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	_, err := planWindowsScript(scriptPath, platform.Host())
+	if err == nil || !strings.Contains(err.Error(), "cannot determine interpreter") {
+		t.Fatalf("expected cannot-determine-interpreter error, got: %v", err)
+	}
+}
+
+// .ps1 file with a shebang that names pwsh must dispatch to pwsh.exe, not
+// the legacy powershell.exe.
+func TestPlanWindowsScript_PS1ShebangPwsh(t *testing.T) {
+	dir := t.TempDir()
+	scriptPath := filepath.Join(dir, "check.ps1")
+	if err := os.WriteFile(scriptPath, []byte("#!/usr/bin/env pwsh\nWrite-Host hi\n"), 0o600); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	plan, err := planWindowsScript(scriptPath, platform.Host())
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	cmd := plan.command(`C:\tmp\d\script.ps1`)
+	if !strings.HasPrefix(cmd, "pwsh ") {
+		t.Fatalf("expected pwsh prefix, got %q", cmd)
+	}
+	if strings.HasPrefix(cmd, "powershell ") {
+		t.Fatalf("pwsh shebang should not route to powershell.exe: %q", cmd)
+	}
+}
+
+// .ps1 file with `#!/usr/bin/env -S pwsh -NoLogo` must forward -NoLogo to
+// the interpreter -- shebang options must not be silently dropped.
+func TestPlanWindowsScript_PS1ShebangForwardsOpts(t *testing.T) {
+	dir := t.TempDir()
+	scriptPath := filepath.Join(dir, "check.ps1")
+	if err := os.WriteFile(scriptPath, []byte("#!/usr/bin/env -S pwsh -NoLogo\nWrite-Host hi\n"), 0o600); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	plan, err := planWindowsScript(scriptPath, platform.Host())
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	cmd := plan.command(`C:\tmp\d\script.ps1`)
+	if !strings.Contains(cmd, "-NoLogo") {
+		t.Fatalf("expected -NoLogo forwarded from shebang, got %q", cmd)
+	}
+	// Sanity: defaults still get appended.
+	for _, want := range []string{"-NoProfile", "-ExecutionPolicy", "Bypass", "-File"} {
+		if !strings.Contains(cmd, want) {
+			t.Fatalf("expected %q in command, got %q", want, cmd)
+		}
+	}
+}
+
+// .ps1 file with no shebang (or a non-PowerShell shebang) must keep the
+// legacy default: powershell.exe with the standard flag set.
+func TestPlanWindowsScript_PS1NoShebangUsesLegacyDefault(t *testing.T) {
+	dir := t.TempDir()
+	scriptPath := filepath.Join(dir, "check.ps1")
+	if err := os.WriteFile(scriptPath, []byte("Write-Host hi\n"), 0o600); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	plan, err := planWindowsScript(scriptPath, platform.Host())
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	cmd := plan.command(`C:\tmp\d\script.ps1`)
+	if !strings.HasPrefix(cmd, "powershell -NoProfile -ExecutionPolicy Bypass -File ") {
+		t.Fatalf("expected legacy powershell default, got %q", cmd)
+	}
+}
+
+// TestPlanWindowsScript_ShellScript covers the .sh branch, whose outcome
+// depends on whether bash is discoverable on the host running the test.
+func TestPlanWindowsScript_ShellScript(t *testing.T) {
+	plan, err := planWindowsScript(`C:\skill\check.sh`, platform.Host())
+	if _, ok := platform.DiscoverBash(); ok {
+		if err != nil {
+			t.Fatalf("bash is available but planning failed: %v", err)
+		}
+		if plan.uploadName != "script.sh" {
+			t.Fatalf("uploadName = %q, want \"script.sh\"", plan.uploadName)
+		}
+		return
+	}
+	if err == nil || !strings.Contains(err.Error(), "requires bash on Windows") {
+		t.Fatalf("expected bash-required error, got: %v", err)
+	}
+}
+
+func TestShebangExtension(t *testing.T) {
+	tests := []struct {
+		name    string
+		content string
+		want    string
+	}{
+		{"posix sh", "#!/bin/sh\necho hi\n", ".sh"},
+		{"env bash", "#!/usr/bin/env bash\necho hi\n", ".sh"},
+		{"env -S bash", "#!/usr/bin/env -S bash -eu\necho hi\n", ".sh"},
+		{"pwsh", "#!/usr/bin/env pwsh\nWrite-Host hi\n", ".ps1"},
+		{"powershell direct", "#!/usr/local/bin/powershell\nWrite-Host hi\n", ".ps1"},
+		{"no shebang", "echo hi\n", ""},
+		{"empty", "", ""},
+		{"unrecognized ruby", "#!/usr/bin/env ruby\nputs 1\n", ""},
+		// fish, ksh-suffixed names etc. must not be misclassified as `.sh`
+		// just because their name contains the letters "sh".
+		{"fish not sh", "#!/usr/bin/env fish\necho hi\n", ""},
+		{"python not sh", "#!/usr/bin/env python3\nprint(1)\n", ""},
+		{"swish not sh", "#!/usr/local/bin/swish\n", ""},
+		// On Windows the .sh runner always invokes bash; non-bash POSIX
+		// shells must not get silently routed to it. Reject so the planner
+		// reports "cannot determine interpreter".
+		{"zsh rejected", "#!/usr/bin/env zsh\necho hi\n", ""},
+		{"dash rejected", "#!/bin/dash\necho hi\n", ""},
+		{"ksh rejected", "#!/usr/bin/env ksh\necho hi\n", ""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			dir := t.TempDir()
+			p := filepath.Join(dir, "script")
+			if err := os.WriteFile(p, []byte(tt.content), 0o600); err != nil {
+				t.Fatalf("write: %v", err)
+			}
+			if got := shebangExtension(p); got != tt.want {
+				t.Fatalf("shebangExtension = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestCleanupCommand_POSIX(t *testing.T) {
+	plan, err := planScript("/skill/check.sh", "linux")
+	if err != nil {
+		t.Fatalf("planScript: %v", err)
+	}
+	if got, want := plan.cleanupCommand("/tmp/d"), "rm -rf '/tmp/d'"; got != want {
+		t.Fatalf("posix cleanupCommand = %q, want %q", got, want)
+	}
+}
+
+func TestCleanupCommand_Windows(t *testing.T) {
+	plan, err := planWindowsScript(`C:\skill\check.ps1`, platform.Host())
+	if err != nil {
+		t.Fatalf("planWindowsScript: %v", err)
+	}
+	got := plan.cleanupCommand(`C:\tmp\d`)
+	if !strings.HasPrefix(got, "cmd /d /s /c rd /s /q ") {
+		t.Fatalf("windows cleanupCommand = %q, want prefix %q", got, "cmd /d /s /c rd /s /q ")
+	}
+}
+
+// .sh on Windows cleans up via bash `rm -rf` rather than dispatching to cmd,
+// avoiding the bash -> cmd hop that the .ps1/.cmd/.bat paths necessarily take.
+func TestCleanupCommand_Windows_ShellScriptUsesBashRm(t *testing.T) {
+	if _, ok := platform.DiscoverBash(); !ok {
+		t.Skip("requires bash to plan a .sh Windows script")
+	}
+	plan, err := planWindowsScript(`C:\skill\check.sh`, platform.Host())
+	if err != nil {
+		t.Fatalf("planWindowsScript: %v", err)
+	}
+	got := plan.cleanupCommand(`C:\tmp\d`)
+	if !strings.HasPrefix(got, "rm -rf ") {
+		t.Fatalf("windows .sh cleanupCommand = %q, want prefix %q", got, "rm -rf ")
+	}
+	if strings.Contains(got, "cmd /") {
+		t.Fatalf("windows .sh cleanupCommand should not invoke cmd, got %q", got)
+	}
+	// Note: filepath.ToSlash is a no-op when this test runs on a POSIX host
+	// (separator is `/`, no backslashes to convert), so we cannot assert the
+	// forward-slash form here. On a real Windows host the production code
+	// path converts the backslashes; behaviour is exercised by Windows CI.
+}
+
+func TestJudgeTempDir(t *testing.T) {
+	if d := judgeTempDir("linux"); !strings.HasPrefix(d, "/tmp/skill-up-judge-") {
+		t.Fatalf("posix judgeTempDir = %q, want /tmp/skill-up-judge- prefix", d)
+	}
+	if d := judgeTempDir("windows"); !strings.Contains(d, "skill-up-judge-") {
+		t.Fatalf("windows judgeTempDir = %q, want skill-up-judge- substring", d)
+	}
+}
diff --git a/internal/judge/script.go b/internal/judge/script.go
index eeeeb6e..441737b 100644
--- a/internal/judge/script.go
+++ b/internal/judge/script.go
@@ -4,13 +4,11 @@ import (
 	"context"
 	"fmt"
 	"log"
-	"path"
 	"strconv"
 	"strings"
 	"time"
 
 	evalruntime "github.com/alibaba/skill-up/internal/runtime"
-	"github.com/alibaba/skill-up/internal/shellquote"
 )
 
 // DefaultScriptTimeout is the default timeout for script execution (30s).
@@ -76,33 +74,48 @@ func (j *ScriptJudge) runtime(ctx context.Context) (evalruntime.Runtime, func(),
 }
 
 func (j *ScriptJudge) evaluateInRuntime(ctx context.Context, rt evalruntime.Runtime, in Input, timeout time.Duration) (*Result, error) {
-	remoteDir := path.Join("/tmp", fmt.Sprintf("skill-up-judge-%d", time.Now().UnixNano()))
-	remoteScript := path.Join(remoteDir, "script")
+	targetGOOS := rt.TargetGOOS()
+	plan, err := planScript(j.ScriptPath, targetGOOS)
+	if err != nil {
+		return nil, fmt.Errorf("script execution failed: %w", err)
+	}
+
+	remoteDir := judgeTempDir(targetGOOS)
+	remoteScript := joinForGOOS(targetGOOS, remoteDir, plan.uploadName)
 	if err := rt.UploadFile(ctx, j.ScriptPath, remoteScript); err != nil {
 		return nil, fmt.Errorf("script execution failed: upload script judge: %w", err)
 	}
 	defer func() {
-		_, _ = rt.Exec(context.WithoutCancel(ctx), "rm -rf "+shellquote.Quote(remoteDir), evalruntime.ExecOptions{})
+		_, _ = rt.Exec(context.WithoutCancel(ctx), plan.cleanupCommand(remoteDir), evalruntime.ExecOptions{})
 	}()
 
 	remoteTranscript := ""
 	if j.TranscriptPath != "" {
-		remoteTranscript = path.Join(remoteDir, "transcript.json")
+		remoteTranscript = joinForGOOS(targetGOOS, remoteDir, "transcript.json")
 		if err := rt.UploadFile(ctx, j.TranscriptPath, remoteTranscript); err != nil {
 			return nil, fmt.Errorf("script execution failed: upload script judge transcript: %w", err)
 		}
 	}
 
-	command := "chmod 700 " + shellquote.Quote(remoteScript) + " && " + shellquote.Quote(remoteScript)
+	command := plan.command(remoteScript)
 	cwd := in.WorkspacePath
 	if cwd == "" {
 		cwd = rt.Workspace()
 	}
+	// Translate the transcript path into the script interpreter's preferred
+	// form (e.g. forward slashes for `.sh` running under Git Bash so POSIX
+	// tools can `cat "$EVAL_TRANSCRIPT_PATH"`). Every plan returned by
+	// planScript sets envPath (identity on POSIX, ToSlash on Windows .sh),
+	// so the function is always safe to invoke.
+	transcriptEnv := remoteTranscript
+	if remoteTranscript != "" {
+		transcriptEnv = plan.envPath(remoteTranscript)
+	}
 	result, err := rt.Exec(ctx, command, evalruntime.ExecOptions{
 		Cwd:        cwd,
 		TimeoutSec: int(timeout.Seconds()),
 		Env: map[string]string{
-			"EVAL_TRANSCRIPT_PATH": remoteTranscript,
+			"EVAL_TRANSCRIPT_PATH": transcriptEnv,
 			"EVAL_FINAL_MESSAGE":   in.FinalMessage,
 			"EVAL_EXIT_CODE":       strconv.Itoa(in.ExitCode),
 		},
diff --git a/internal/judge/script_test.go b/internal/judge/script_test.go
index 7c4dce8..279655a 100644
--- a/internal/judge/script_test.go
+++ b/internal/judge/script_test.go
@@ -8,29 +8,36 @@ import (
 	"strings"
 	"testing"
 
+	"github.com/alibaba/skill-up/internal/platform"
 	evalruntime "github.com/alibaba/skill-up/internal/runtime"
 )
 
-const osWindows = "windows"
-
-// scriptJudgeCase parametrises the common "write script → run judge →
-// assert status & evidence" flow so individual test bodies stay focused on
-// what's unique. Extracted to silence dupl on near-identical pass/fail cases.
+// scriptJudgeCase parametrises the common "write script → run judge → assert
+// status & evidence" flow so individual test bodies stay focused on what's
+// unique. Each case carries both a POSIX (.sh) and a Windows (.cmd) script
+// body; runScriptJudgeCase picks the one matching the host OS, since these
+// tests exercise the none runtime, which executes on the host.
 type scriptJudgeCase struct {
-	scriptName    string
-	scriptBody    string
+	posixScript   string
+	windowsScript string
 	input         Input
 	expectStatus  Status
 	evidenceMatch string
 }
 
+// script returns the upload name and body appropriate for the host OS.
+func (tc scriptJudgeCase) script() (name, body string) {
+	if runtime.GOOS == platform.GOOSWindows {
+		return "check.cmd", tc.windowsScript
+	}
+	return "check.sh", tc.posixScript
+}
+
 func runScriptJudgeCase(t *testing.T, tc scriptJudgeCase) {
 	t.Helper()
-	if runtime.GOOS == osWindows {
-		t.Skip("skipping on windows")
-	}
 	dir := t.TempDir()
-	script := writeScript(t, dir, tc.scriptName, tc.scriptBody)
+	name, body := tc.script()
+	script := writeScript(t, dir, name, body)
 	in := tc.input
 	if in.WorkspacePath == "" {
 		in.WorkspacePath = dir
@@ -50,8 +57,8 @@ func runScriptJudgeCase(t *testing.T, tc scriptJudgeCase) {
 
 func TestScriptJudge_Pass(t *testing.T) {
 	runScriptJudgeCase(t, scriptJudgeCase{
-		scriptName: "check.sh",
-		scriptBody: "#!/bin/sh\necho \"all checks passed\"\nexit 0\n",
+		posixScript:   "#!/bin/sh\necho \"all checks passed\"\nexit 0\n",
+		windowsScript: "@echo off\r\necho all checks passed\r\nexit /b 0\r\n",
 		input: Input{
 			FinalMessage: "test output",
 			ExitCode:     0,
@@ -63,8 +70,8 @@ func TestScriptJudge_Pass(t *testing.T) {
 
 func TestScriptJudge_Pass_EmptyStdout(t *testing.T) {
 	runScriptJudgeCase(t, scriptJudgeCase{
-		scriptName:    "check.sh",
-		scriptBody:    "#!/bin/sh\nexit 0\n",
+		posixScript:   "#!/bin/sh\nexit 0\n",
+		windowsScript: "@echo off\r\nexit /b 0\r\n",
 		expectStatus:  StatusPass,
 		evidenceMatch: "script passed",
 	})
@@ -76,8 +83,8 @@ func TestScriptJudge_Pass_EmptyStdout(t *testing.T) {
 
 func TestScriptJudge_Fail_NonZeroExit(t *testing.T) {
 	runScriptJudgeCase(t, scriptJudgeCase{
-		scriptName:    "check.sh",
-		scriptBody:    "#!/bin/sh\necho \"review quality below threshold\"\nexit 1\n",
+		posixScript:   "#!/bin/sh\necho \"review quality below threshold\"\nexit 1\n",
+		windowsScript: "@echo off\r\necho review quality below threshold\r\nexit /b 1\r\n",
 		expectStatus:  StatusFail,
 		evidenceMatch: "review quality below threshold",
 	})
@@ -85,8 +92,8 @@ func TestScriptJudge_Fail_NonZeroExit(t *testing.T) {
 
 func TestScriptJudge_Fail_NonZeroExit_EmptyStdout(t *testing.T) {
 	runScriptJudgeCase(t, scriptJudgeCase{
-		scriptName:    "check.sh",
-		scriptBody:    "#!/bin/sh\nexit 2\n",
+		posixScript:   "#!/bin/sh\nexit 2\n",
+		windowsScript: "@echo off\r\nexit /b 2\r\n",
 		expectStatus:  StatusFail,
 		evidenceMatch: "exited with code 2",
 	})
@@ -98,8 +105,8 @@ func TestScriptJudge_Fail_NonZeroExit_EmptyStdout(t *testing.T) {
 
 func TestScriptJudge_StderrCaptured(t *testing.T) {
 	runScriptJudgeCase(t, scriptJudgeCase{
-		scriptName:    "check.sh",
-		scriptBody:    "#!/bin/sh\necho \"passed\"\necho \"debug info here\" >&2\nexit 0\n",
+		posixScript:   "#!/bin/sh\necho \"passed\"\necho \"debug info here\" >&2\nexit 0\n",
+		windowsScript: "@echo off\r\necho passed\r\necho debug info here 1>&2\r\nexit /b 0\r\n",
 		expectStatus:  StatusPass,
 		evidenceMatch: "debug info here",
 	})
@@ -110,11 +117,14 @@ func TestScriptJudge_StderrCaptured(t *testing.T) {
 // ---------------------------------------------------------------------------
 
 func TestScriptJudge_Timeout(t *testing.T) {
-	if runtime.GOOS == osWindows {
-		t.Skip("skipping on windows")
-	}
 	dir := t.TempDir()
-	script := writeScript(t, dir, "slow.sh", "#!/bin/sh\nsleep 10\nexit 0\n")
+	name, body := "slow.sh", "#!/bin/sh\nsleep 10\nexit 0\n"
+	if runtime.GOOS == platform.GOOSWindows {
+		// ping -n 11 to a local address waits ~10s without needing console
+		// input (unlike `timeout`).
+		name, body = "slow.cmd", "@echo off\r\nping -n 11 127.0.0.1 >nul\r\nexit /b 0\r\n"
+	}
+	script := writeScript(t, dir, name, body)
 	j := &ScriptJudge{
 		ScriptPath:     script,
 		TimeoutSeconds: 1,
@@ -148,14 +158,16 @@ func TestScriptJudge_ScriptNotFound(t *testing.T) {
 // ---------------------------------------------------------------------------
 
 func TestScriptJudge_EnvVarsInjected(t *testing.T) {
-	if runtime.GOOS == osWindows {
-		t.Skip("skipping on windows")
-	}
 	dir := t.TempDir()
-	script := writeScript(t, dir, "check_env.sh", `#!/bin/sh
-echo "transcript=$EVAL_TRANSCRIPT_PATH final=$EVAL_FINAL_MESSAGE exit=$EVAL_EXIT_CODE"
-exit 0
-`)
+	name, body := "check_env.sh", "#!/bin/sh\n"+
+		"echo \"transcript=$EVAL_TRANSCRIPT_PATH final=$EVAL_FINAL_MESSAGE exit=$EVAL_EXIT_CODE\"\n"+
+		"exit 0\n"
+	if runtime.GOOS == platform.GOOSWindows {
+		name, body = "check_env.cmd", "@echo off\r\n"+
+			"echo transcript=%EVAL_TRANSCRIPT_PATH% final=%EVAL_FINAL_MESSAGE% exit=%EVAL_EXIT_CODE%\r\n"+
+			"exit /b 0\r\n"
+	}
+	script := writeScript(t, dir, name, body)
 	j := &ScriptJudge{
 		ScriptPath:     script,
 		TranscriptPath: filepath.Join(dir, "transcript.json"),
@@ -171,7 +183,7 @@ exit 0
 	assertNoError(t, err)
 	assertStatus(t, r, StatusPass)
 	ev := r.AssertionResults[0].Evidence
-	if !strings.Contains(ev, "transcript=/tmp/skill-up-judge-") {
+	if !strings.Contains(ev, "transcript=") || !strings.Contains(ev, "skill-up-judge-") {
 		t.Fatalf("EVAL_TRANSCRIPT_PATH not injected, evidence: %s", ev)
 	}
 	if !strings.Contains(ev, "final=hello world") {
@@ -183,9 +195,6 @@ exit 0
 }
 
 func TestScriptJudge_EvaluatesInRuntime(t *testing.T) {
-	if runtime.GOOS == osWindows {
-		t.Skip("skipping on windows")
-	}
 	dir := t.TempDir()
 	script := writeScript(t, dir, "check_runtime.sh", `#!/bin/sh
 echo "cwd=$(pwd) transcript=$EVAL_TRANSCRIPT_PATH final=$EVAL_FINAL_MESSAGE exit=$EVAL_EXIT_CODE"
@@ -247,6 +256,7 @@ func (r *scriptJudgeRuntime) Start(context.Context) error  { return nil }
 func (r *scriptJudgeRuntime) Stop(context.Context) error   { return nil }
 func (r *scriptJudgeRuntime) Workspace() string            { return r.workspace }
 func (r *scriptJudgeRuntime) RequiresProcessSandbox() bool { return true }
+func (r *scriptJudgeRuntime) TargetGOOS() string           { return "linux" }
 
 func (r *scriptJudgeRuntime) UploadFile(_ context.Context, _, targetPath string) error {
 	r.uploads = append(r.uploads, targetPath)
diff --git a/internal/mcp/mock_server_test.go b/internal/mcp/mock_server_test.go
index 4ec6813..7c2c01c 100644
--- a/internal/mcp/mock_server_test.go
+++ b/internal/mcp/mock_server_test.go
@@ -9,6 +9,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	goruntime "runtime"
 	"strconv"
 	"strings"
 	"testing"
@@ -22,6 +23,16 @@ type mockProc struct {
 
 func startMockServer(t *testing.T, script, dir string) *mockProc {
 	t.Helper()
+	if goruntime.GOOS == "windows" {
+		// Every mocked MCP server test spawns a child Node process and
+		// exchanges Content-Length-framed JSON-RPC over its stdout. Node on
+		// default Windows applies codepage / CRLF translation to that
+		// stdout, which corrupts the framed byte stream and makes the
+		// reader hang on the response header until the 15s ctx timeout
+		// fires. Verifying the framing/transport on POSIX is sufficient;
+		// the framing logic itself is platform-independent Go code.
+		t.Skip("Node stdio codepage/CRLF translation corrupts framed binary on Windows")
+	}
 	if _, err := exec.LookPath("node"); err != nil {
 		t.Skip("node is required for mock server tests")
 	}
diff --git a/internal/platform/bash_other.go b/internal/platform/bash_other.go
new file mode 100644
index 0000000..38ee53e
--- /dev/null
+++ b/internal/platform/bash_other.go
@@ -0,0 +1,24 @@
+//go:build !windows
+
+package platform
+
+import (
+	"os"
+	"os/exec"
+)
+
+// DiscoverBash locates a bash interpreter on non-Windows hosts: the
+// SKILL_UP_BASH override first, then PATH.
+func DiscoverBash() (string, bool) {
+	if v := os.Getenv(BashEnvOverride); v != "" {
+		//nolint:gosec // v is an explicit user-supplied bash override path
+		if info, err := os.Stat(v); err == nil && !info.IsDir() {
+			return v, true
+		}
+	}
+	p, err := exec.LookPath("bash")
+	if err != nil {
+		return "", false
+	}
+	return p, true
+}
diff --git a/internal/platform/bash_windows.go b/internal/platform/bash_windows.go
new file mode 100644
index 0000000..a40842e
--- /dev/null
+++ b/internal/platform/bash_windows.go
@@ -0,0 +1,75 @@
+//go:build windows
+
+package platform
+
+import (
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+)
+
+// knownWindowsBashPaths lists the default Git Bash install locations checked
+// after BashEnvOverride and PATH.
+//
+// WSL's C:\Windows\System32\bash.exe is intentionally excluded: it expects
+// Linux-format paths (/mnt/c/...), so script-judge commands built from
+// Windows host paths would fail with file-not-found even though discovery
+// succeeded. Users who want to drive skill-up through WSL bash can point
+// SKILL_UP_BASH at it explicitly after arranging path translation upstream.
+var knownWindowsBashPaths = []string{
+	`C:\Program Files\Git\bin\bash.exe`,
+	`C:\Program Files (x86)\Git\bin\bash.exe`,
+}
+
+// DiscoverBash locates a bash interpreter on Windows. It checks, in order:
+// the SKILL_UP_BASH override, PATH (excluding the WSL shim under System32),
+// then well-known Git Bash locations.
+func DiscoverBash() (string, bool) {
+	if v := os.Getenv(BashEnvOverride); v != "" {
+		if isRegularFile(v) && !isWSLBash(v) {
+			return v, true
+		}
+	}
+	if p, err := exec.LookPath("bash"); err == nil && !isWSLBash(p) {
+		return p, true
+	}
+	for _, p := range knownWindowsBashPaths {
+		if isRegularFile(p) {
+			return p, true
+		}
+	}
+	return "", false
+}
+
+// isWSLBash reports whether p is the WSL bash shim shipped with Windows. The
+// shim lives under %SystemRoot%\System32, which is on PATH by default on every
+// Windows host, so PATH-based discovery would otherwise prefer it. The shim
+// expects Linux-format paths (`/mnt/<drive>/...`) and silently fails on the
+// Windows host paths we pass through, so we treat it as "no bash found" and
+// fall through to the known Git Bash locations.
+func isWSLBash(p string) bool {
+	abs, err := filepath.Abs(p)
+	if err != nil {
+		return false
+	}
+	system32 := windowsSystem32Dir()
+	if system32 == "" {
+		return false
+	}
+	return strings.EqualFold(filepath.Dir(abs), system32)
+}
+
+func windowsSystem32Dir() string {
+	if root := os.Getenv("SystemRoot"); root != "" {
+		return filepath.Join(root, "System32")
+	}
+	// Fall back to the convention; SystemRoot is set on every modern Windows.
+	return `C:\Windows\System32`
+}
+
+func isRegularFile(p string) bool {
+	//nolint:gosec // p is a known bash install location or a user-supplied override
+	info, err := os.Stat(p)
+	return err == nil && !info.IsDir()
+}
diff --git a/internal/platform/platform.go b/internal/platform/platform.go
new file mode 100644
index 0000000..944502e
--- /dev/null
+++ b/internal/platform/platform.go
@@ -0,0 +1,48 @@
+// Package platform centralizes OS-conditional process, shell, and executable
+// discovery so the rest of skill-up does not scatter runtime.GOOS branches
+// across packages.
+package platform
+
+import (
+	"context"
+	"os/exec"
+)
+
+// BashEnvOverride is the environment variable a user may set to point at a
+// specific bash interpreter, taking precedence over PATH and well-known
+// install locations.
+const BashEnvOverride = "SKILL_UP_BASH"
+
+// GOOS-value constants that callers across packages compare against
+// runtime.GOOS / Runtime.TargetGOOS(). Centralizing them avoids string
+// literals duplicated across the OS-dispatch sites.
+const (
+	GOOSWindows = "windows"
+	GOOSLinux   = "linux"
+	GOOSDarwin  = "darwin"
+)
+
+// HostShell describes the shell that NoneRuntime.Exec will use on the
+// current host. Callers that need to quote arguments for the same shell or
+// inject extra environment variables it depends on should reach for Quote
+// and Env rather than re-deriving them.
+type HostShell struct {
+	// Cmd builds an exec.Cmd configured to run `command` through the host
+	// shell. The caller still sets Dir, Env (which must be merged with
+	// HostShell.Env), Stdout, and Stderr.
+	Cmd func(ctx context.Context, command string) *exec.Cmd
+	// Quote returns the argument-quoting suitable for the host shell.
+	Quote func(s string) string
+	// Env lists extra environment variables the shell needs to behave
+	// predictably (for example MSYS_NO_PATHCONV for Git Bash on Windows).
+	// Callers append it to their own env list; nil means "no extras".
+	Env []string
+	// IsBash reports whether the chosen shell is bash. POSIX hosts are
+	// always true; Windows is true only when DiscoverBash succeeded.
+	IsBash bool
+	// Bash is the discovered bash interpreter path, populated when IsBash
+	// is true. Callers building "bash invokes bash" pipelines (the .sh
+	// script-judge plan on Windows) read it from here so the choice of
+	// bash binary matches what Cmd will launch.
+	Bash string
+}
diff --git a/internal/platform/platform_test.go b/internal/platform/platform_test.go
new file mode 100644
index 0000000..b793c1d
--- /dev/null
+++ b/internal/platform/platform_test.go
@@ -0,0 +1,28 @@
+package platform
+
+import (
+	"context"
+	"testing"
+)
+
+func TestHost(t *testing.T) {
+	shell := Host()
+	if shell.Cmd == nil {
+		t.Fatal("Host returned a HostShell with nil Cmd")
+	}
+	if shell.Quote == nil {
+		t.Fatal("Host returned a HostShell with nil Quote")
+	}
+	cmd := shell.Cmd(context.Background(), "echo hi")
+	if cmd == nil {
+		t.Fatal("HostShell.Cmd returned nil")
+	}
+	if cmd.Path == "" {
+		t.Fatal("HostShell.Cmd produced a command with no executable path")
+	}
+	// On POSIX hosts bash should be discoverable (PATH has it); IsBash
+	// being false would point at a misconfigured runner.
+	if !shell.IsBash {
+		t.Logf("note: HostShell.IsBash is false (no bash discovered); the cmd fallback is exercised")
+	}
+}
diff --git a/internal/platform/shell_other.go b/internal/platform/shell_other.go
new file mode 100644
index 0000000..bb7bd6f
--- /dev/null
+++ b/internal/platform/shell_other.go
@@ -0,0 +1,46 @@
+//go:build !windows
+
+package platform
+
+import (
+	"context"
+	"os/exec"
+	"sync"
+
+	"github.com/alibaba/skill-up/internal/shellquote"
+)
+
+// Host returns the descriptor of the shell NoneRuntime.Exec will use on the
+// current host: how to launch a command, how to quote an argument for that
+// shell, and any extra environment variables the shell needs to behave
+// predictably.
+//
+// Centralizing all three lets callers (the script-judge planner especially)
+// pick a quoter that matches the shell actually launched, without
+// re-deriving the shell choice independently.
+//
+// On POSIX the shell is bash when discoverable, sh otherwise. POSIX
+// single-quoting is correct in both cases.
+//
+// The result is cached for the process lifetime; see shell_windows.go for
+// the rationale.
+var hostShell = sync.OnceValue(buildHostShell)
+
+// Host returns the cached HostShell descriptor for the current POSIX host.
+func Host() HostShell { return hostShell() }
+
+func buildHostShell() HostShell {
+	bash, hasBash := DiscoverBash()
+	shell := "sh"
+	if hasBash {
+		shell = bash
+	}
+	return HostShell{
+		Cmd: func(ctx context.Context, command string) *exec.Cmd {
+			return exec.CommandContext(ctx, shell, "-c", command)
+		},
+		Quote:  shellquote.QuotePOSIX,
+		IsBash: hasBash,
+		Bash:   bash,
+	}
+}
diff --git a/internal/platform/shell_windows.go b/internal/platform/shell_windows.go
new file mode 100644
index 0000000..6c0b5cf
--- /dev/null
+++ b/internal/platform/shell_windows.go
@@ -0,0 +1,91 @@
+//go:build windows
+
+package platform
+
+import (
+	"context"
+	"os/exec"
+	"strings"
+	"sync"
+	"syscall"
+
+	"github.com/alibaba/skill-up/internal/shellquote"
+)
+
+// Host returns the descriptor of the shell NoneRuntime.Exec will use on the
+// current Windows host.
+//
+// We prefer a discoverable bash (Git Bash via DiscoverBash) and fall back to
+// cmd.exe when none is available. Choosing bash whenever possible keeps the
+// many internal POSIX command strings -- agent CLI templates with single
+// quotes, `set -eu` git fixtures, workspace-diff `if ...; then` pipelines --
+// working on a Windows host. The bash leg also injects MSYS_NO_PATHCONV /
+// MSYS2_ARG_CONV_EXCL so MSYS does not rewrite `/x`-shaped argv entries
+// before they reach native Windows binaries like cmd.exe or powershell.
+//
+// The cmd fallback uses SysProcAttr.CmdLine with `cmd /d /s /c "<command>"`:
+// `/d` disables HKLM/HKCU AutoRun and `/s` forces the deterministic
+// strip-first-and-last-quote rule for the wrapping. Inside the wrapping the
+// command must be CommandLineToArgvW-quoted, hence the QuoteWindows quoter.
+// cmd.exe is not a POSIX shell, so bash-style command strings (the agent
+// nvm/Node bootstrap) do not run natively on Windows under the cmd
+// fallback; that remains a documented limitation in docs/guide/windows.md.
+//
+// The result is cached for the process lifetime: PATH / SKILL_UP_BASH are
+// documented as read-once at startup (see BashEnvOverride), and the host's
+// bash install does not change while skill-up is running. Caching avoids
+// repeating LookPath/Stat work on every NoneRuntime.Exec.
+var hostShell = sync.OnceValue(buildHostShell)
+
+// Host returns the cached HostShell descriptor for the current Windows host.
+func Host() HostShell { return hostShell() }
+
+func buildHostShell() HostShell {
+	bash, hasBash := DiscoverBash()
+	if hasBash {
+		return HostShell{
+			Cmd: func(ctx context.Context, command string) *exec.Cmd {
+				return exec.CommandContext(ctx, bash, "-c", command)
+			},
+			Quote: quoteForBashDoubleQuote,
+			Env: []string{
+				"MSYS_NO_PATHCONV=1",
+				"MSYS2_ARG_CONV_EXCL=*",
+			},
+			IsBash: true,
+			Bash:   bash,
+		}
+	}
+	return HostShell{
+		Cmd: func(ctx context.Context, command string) *exec.Cmd {
+			cmd := exec.CommandContext(ctx, "cmd")
+			cmd.SysProcAttr = &syscall.SysProcAttr{CmdLine: `cmd /d /s /c "` + command + `"`}
+			return cmd
+		},
+		Quote:  shellquote.QuoteWindows,
+		IsBash: false,
+	}
+}
+
+// quoteForBashDoubleQuote returns s wrapped in double quotes with every
+// character that bash treats as active inside double quotes escaped with a
+// backslash. The four actives are \, ", $, `. After bash decodes the
+// resulting string each of those bytes is delivered intact to the program
+// bash spawns (cmd / powershell / a second bash), so a path like
+// `C:\tmp\$foo\script.ps1` survives the bash -c hop without losing the
+// backslash before `$`. cmd.exe never sees this encoding because we only
+// pick it when bash is the chosen shell.
+func quoteForBashDoubleQuote(s string) string {
+	var b strings.Builder
+	b.Grow(len(s) + 2)
+	b.WriteByte('"')
+	for i := range len(s) {
+		c := s[i]
+		if c == '\\' || c == '"' || c == '$' || c == '`' {
+			b.WriteByte('\\')
+		}
+		b.WriteByte(c)
+	}
+	b.WriteByte('"')
+	return b.String()
+}
diff --git a/internal/report/workspace.go b/internal/report/workspace.go
index 96d436f..cbd4d72 100644
--- a/internal/report/workspace.go
+++ b/internal/report/workspace.go
@@ -32,6 +32,15 @@ const (
 	filePerm = 0o644
 )
 
+// isRootedPath reports whether p begins with either path separator. On
+// Windows filepath.IsAbs requires a volume name (e.g. `C:\…`), so a POSIX-
+// style "/abs.txt" passed in from user-supplied YAML would otherwise be
+// accepted as a relative path; treat any leading `/` or `\` as rooted to
+// keep the path-traversal guard OS-independent.
+func isRootedPath(p string) bool {
+	return strings.HasPrefix(p, "/") || strings.HasPrefix(p, `\`)
+}
+
 // validateCaseID checks that caseID does not contain path traversal sequences.
 func validateCaseID(caseID string) error {
 	cleaned := filepath.Clean(caseID)
@@ -204,9 +213,12 @@ func (w *IterationWorkspace) WriteBenchmarkMD(bm *AnthropicBenchmark) error {
 
 // WriteFile writes arbitrary content to a file in the iteration directory.
 func (w *IterationWorkspace) WriteFile(relPath string, data []byte) error {
-	// Prevent path traversal attacks.
+	// Prevent path traversal attacks. On Windows filepath.IsAbs requires a
+	// volume name, so a POSIX-style "/abs.txt" passed in by an eval case
+	// would slip through; reject rooted paths (leading separator) as well
+	// to keep the security check OS-independent.
 	cleaned := filepath.Clean(relPath)
-	if filepath.IsAbs(cleaned) || strings.HasPrefix(cleaned, "..") {
+	if filepath.IsAbs(cleaned) || isRootedPath(cleaned) || strings.HasPrefix(cleaned, "..") {
 		return fmt.Errorf("invalid relative path: %s", relPath)
 	}
 
diff --git a/internal/report/workspace_test.go b/internal/report/workspace_test.go
index 9685efe..0163279 100644
--- a/internal/report/workspace_test.go
+++ b/internal/report/workspace_test.go
@@ -46,21 +46,26 @@ func TestNewIterationWorkspace_DefaultAndInvalidIteration(t *testing.T) {
 
 func TestIterationWorkspace_Paths(t *testing.T) {
 	t.Parallel()
-	ws, err := NewIterationWorkspace("/tmp/test-workspace", "test-skill", 1)
+	// Use filepath.Join so the test root and the values produced by
+	// IterationDir/CaseDir/etc. share the host's path separator.
+	root := filepath.Join(string(filepath.Separator), "tmp", "test-workspace")
+	iter := filepath.Join(root, "iteration-1")
+	caseDir := filepath.Join(iter, "case-1")
+	ws, err := NewIterationWorkspace(root, "test-skill", 1)
 	if err != nil {
 		t.Fatalf("NewIterationWorkspace error: %v", err)
 	}
 
-	if ws.IterationDir() != "/tmp/test-workspace/iteration-1" {
+	if ws.IterationDir() != iter {
 		t.Errorf("unexpected iteration dir: %s", ws.IterationDir())
 	}
-	if ws.CaseDir("case-1") != "/tmp/test-workspace/iteration-1/case-1" {
+	if ws.CaseDir("case-1") != caseDir {
 		t.Errorf("unexpected case dir: %s", ws.CaseDir("case-1"))
 	}
-	if ws.WithSkillDir("case-1") != "/tmp/test-workspace/iteration-1/case-1/with_skill" {
+	if ws.WithSkillDir("case-1") != filepath.Join(caseDir, "with_skill") {
 		t.Errorf("unexpected with_skill dir: %s", ws.WithSkillDir("case-1"))
 	}
-	if ws.WithoutSkillDir("case-1") != "/tmp/test-workspace/iteration-1/case-1/without_skill" {
+	if ws.WithoutSkillDir("case-1") != filepath.Join(caseDir, "without_skill") {
 		t.Errorf("unexpected without_skill dir: %s", ws.WithoutSkillDir("case-1"))
 	}
 }
diff --git a/internal/runtime/docker.go b/internal/runtime/docker.go
index 5b19f07..205141d 100644
--- a/internal/runtime/docker.go
+++ b/internal/runtime/docker.go
@@ -19,6 +19,7 @@ import (
 
 	"github.com/alibaba/skill-up/internal/logging"
 	"github.com/alibaba/skill-up/internal/observability"
+	"github.com/alibaba/skill-up/internal/platform"
 )
 
 const (
@@ -249,6 +250,11 @@ func (r *DockerRuntime) Close() error {
 	return nil
 }
 
+// TargetGOOS reports the GOOS of the container's guest OS. skill-up's
+// docker runtime currently provisions a Linux image, so commands executed
+// via `docker exec` run on a Linux guest regardless of the host platform.
+func (r *DockerRuntime) TargetGOOS() string { return platform.GOOSLinux }
+
 // Start starts the container if it is not already running.
 func (r *DockerRuntime) Start(ctx context.Context) error {
 	r.mu.Lock()
diff --git a/internal/runtime/none.go b/internal/runtime/none.go
index caf498e..45e9c50 100644
--- a/internal/runtime/none.go
+++ b/internal/runtime/none.go
@@ -8,17 +8,30 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	goruntime "runtime"
 	"time"
 
 	"go.opentelemetry.io/otel/attribute"
 
 	"github.com/alibaba/skill-up/internal/logging"
 	"github.com/alibaba/skill-up/internal/observability"
+	"github.com/alibaba/skill-up/internal/platform"
 )
 
 const (
 	noneDirMode  = 0o755
 	noneFileMode = 0o600
+
+	// execContextGracePeriod bounds how long Exec waits after ctx-cancel
+	// before forcibly closing the child's stdio pipes. Applied on every
+	// platform: the original motivation is Windows under Git Bash, where a
+	// killed parent's grandchild (ping/sleep/git) can still hold the stderr
+	// pipe and block pipe-reader goroutines forever; on POSIX it adds the
+	// same upper bound on shutdown for the rarer case of a child that
+	// ignores SIGTERM and keeps stdio open. 10s is comfortably above
+	// observed Windows grandchild teardown latency while still surfacing a
+	// hang quickly in tests.
+	execContextGracePeriod = 10 * time.Second
 )
 
 // pathInWorkspaceOrAbs returns p if it is an absolute host path, otherwise filepath.Join(r.workspace, p).
@@ -168,7 +181,12 @@ func (r *NoneRuntime) Exec(ctx context.Context, command string, opts ExecOptions
 	defer span.End()
 	startTime := time.Now()
 
-	cmd := exec.CommandContext(ctx, "bash", "-c", command)
+	shell := platform.Host()
+	cmd := shell.Cmd(ctx, command)
+	// Bound the grace window between ctx-cancel and Wait returning so the
+	// pipe-reader goroutine can unblock. See execContextGracePeriod above
+	// for the reasoning.
+	cmd.WaitDelay = execContextGracePeriod
 	if opts.Cwd != "" {
 		cmd.Dir = opts.Cwd
 	} else {
@@ -180,6 +198,10 @@ func (r *NoneRuntime) Exec(ctx context.Context, command string, opts ExecOptions
 	)
 
 	env := mergeEnv(r.cfg.Env, opts.Env)
+	// The shell descriptor may need its own env tweaks (MSYS_NO_PATHCONV on
+	// Windows-bash, ...) — append them once here so the same "use bash →
+	// disable MSYS argv rewrite" decision lives in a single place.
+	env = append(env, shell.Env...)
 	cmd.Env = env
 
 	var stdout, stderr bytes.Buffer
@@ -236,24 +258,27 @@ func (r *NoneRuntime) Exec(ctx context.Context, command string, opts ExecOptions
 // classifyExecError translates a *exec.Cmd Run error into the (exitCode, error)
 // pair that callers expose through ExecResult.
 //
-// Precedence (matches the legacy inline behaviour):
+// Precedence:
 //
 //	nil err                            → (0, nil)
-//	*exec.ExitError + ctx.Err() == nil → (exitCode, nil)
-//	*exec.ExitError + ctx.Err() != nil → (exitCode, ctxErr)   // process was killed by ctx
-//	non-ExitError                      → (-1, ctxErr or err)
+//	ctx.Err() != nil (any cause)       → (-1, ctxErr)   // process was killed by ctx
+//	*exec.ExitError                    → (exitCode, nil)
+//	non-ExitError                      → (-1, runErr)
+//
+// When the context terminated the process we always report -1 instead of the
+// OS-reported exit code: on Windows a killed cmd.exe surfaces 1, which would
+// otherwise be indistinguishable from a normal failure.
 func classifyExecError(ctx context.Context, runErr error) (int, error) {
 	if runErr == nil {
 		return 0, nil
 	}
-	ctxErr := ctx.Err()
+	if ctxErr := ctx.Err(); ctxErr != nil {
+		return -1, ctxErr
+	}
 
 	var exitErr *exec.ExitError
 	if errors.As(runErr, &exitErr) {
-		return exitErr.ExitCode(), ctxErr
-	}
-	if ctxErr != nil {
-		return -1, ctxErr
+		return exitErr.ExitCode(), nil
 	}
 	return -1, runErr
 }
@@ -305,3 +330,9 @@ func (r *NoneRuntime) Workspace() string {
 func (r *NoneRuntime) RequiresProcessSandbox() bool {
 	return true
 }
+
+// TargetGOOS reports the host OS, since NoneRuntime executes commands directly
+// on the host.
+func (r *NoneRuntime) TargetGOOS() string {
+	return goruntime.GOOS
+}
diff --git a/internal/runtime/none_test.go b/internal/runtime/none_test.go
index 03d856b..af882e2 100644
--- a/internal/runtime/none_test.go
+++ b/internal/runtime/none_test.go
@@ -7,6 +7,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	goruntime "runtime"
 	"strings"
 	"sync"
 	"testing"
@@ -225,7 +226,13 @@ func TestNoneRuntime_ExecReturnsContextErrorOnTimeout(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
 	defer cancel()
 
-	result, err := rt.Exec(ctx, "sleep 1", ExecOptions{})
+	// `sleep 1` is POSIX; on Windows cmd.exe falls back to a long ping
+	// so the process actually outlives the deadline and gets killed.
+	sleepCmd := "sleep 1"
+	if goruntime.GOOS == "windows" {
+		sleepCmd = "ping -n 3 127.0.0.1 > nul"
+	}
+	result, err := rt.Exec(ctx, sleepCmd, ExecOptions{})
 	if !errors.Is(err, context.DeadlineExceeded) {
 		t.Fatalf("expected context deadline exceeded, got %v", err)
 	}
@@ -404,6 +411,11 @@ func TestNoneRuntime_ExecWithEnv(t *testing.T) {
 }
 
 func TestNoneRuntime_ExecExpandsPathFromRuntimeEnv(t *testing.T) {
+	if goruntime.GOOS == "windows" {
+		// The test asserts POSIX `printf "$PATH"` expansion; on Windows the
+		// host shell is cmd.exe, which neither has printf nor uses `$PATH`.
+		t.Skip("POSIX PATH expansion test; Windows has no equivalent")
+	}
 	bashPath, err := exec.LookPath("bash")
 	if err != nil {
 		t.Fatal(err)
diff --git a/internal/runtime/opensandbox.go b/internal/runtime/opensandbox.go
index 9f13e5c..a1e1e5b 100644
--- a/internal/runtime/opensandbox.go
+++ b/internal/runtime/opensandbox.go
@@ -578,6 +578,12 @@ func (r *OpenSandboxRuntime) RequiresProcessSandbox() bool {
 	return false
 }
 
+// TargetGOOS reports "linux": OpenSandbox always executes commands inside a
+// Linux sandbox regardless of the host OS.
+func (r *OpenSandboxRuntime) TargetGOOS() string {
+	return "linux"
+}
+
 func (r *OpenSandboxRuntime) connectionConfig() opensandbox.ConnectionConfig {
 	return opensandbox.ConnectionConfig{
 		Domain:         r.baseURL,
@@ -688,7 +694,7 @@ func (r *OpenSandboxRuntime) ensureDirectory(ctx context.Context, dir string, mo
 	if err == nil {
 		return nil
 	}
-	quoted := shellquote.Quote(dir)
+	quoted := shellquote.QuotePOSIX(dir)
 	result, execErr := r.runCommand(ctx, "/", "mkdir -p "+quoted+" && test -d "+quoted+" && test -w "+quoted, 30)
 	if execErr != nil {
 		return err
@@ -711,7 +717,7 @@ func (r *OpenSandboxRuntime) ensureDirectories(ctx context.Context, dirs []strin
 	command.WriteString("mkdir -p")
 	for _, dir := range dirs {
 		command.WriteByte(' ')
-		command.WriteString(shellquote.Quote(dir))
+		command.WriteString(shellquote.QuotePOSIX(dir))
 	}
 	result, err := r.runCommand(ctx, "/", command.String(), 30)
 	if err != nil {
@@ -816,7 +822,12 @@ func safeLocalTarget(root, rel string) (string, error) {
 	if clean == "." {
 		return root, nil
 	}
-	if filepath.IsAbs(clean) || clean == ".." || strings.HasPrefix(clean, ".."+string(filepath.Separator)) {
+	// On Windows filepath.IsAbs requires a volume name, so a POSIX-style
+	// "/absolute" supplied via the SDK would otherwise slip through; also
+	// reject any rooted path (leading separator) so the guard behaves the
+	// same on both OSes.
+	rooted := strings.HasPrefix(clean, "/") || strings.HasPrefix(clean, `\`)
+	if filepath.IsAbs(clean) || rooted || clean == ".." || strings.HasPrefix(clean, ".."+string(filepath.Separator)) {
 		return "", fmt.Errorf("unsafe sandbox file path: %s", rel)
 	}
 	return filepath.Join(root, clean), nil
diff --git a/internal/runtime/runtime.go b/internal/runtime/runtime.go
index 5042a0c..829bf73 100644
--- a/internal/runtime/runtime.go
+++ b/internal/runtime/runtime.go
@@ -112,6 +112,15 @@ type Runtime interface {
 	Workspace() string
 	// RequiresProcessSandbox reports whether agents should enable their own process sandbox.
 	RequiresProcessSandbox() bool
+	// TargetGOOS reports the GOOS value of the environment where Exec
+	// runs commands. NoneRuntime executes on the host so it returns
+	// runtime.GOOS; OpenSandboxRuntime always returns "linux" because
+	// it executes inside a Linux sandbox. Implementations must return a
+	// non-empty value -- callers (most importantly the script-judge
+	// planner) use it to choose between POSIX and Windows command shapes,
+	// and silently defaulting to "linux" would mask configuration
+	// mistakes in any future Windows-targeting runtime.
+	TargetGOOS() string
 }
 
 // FileReadSeeker combines io.ReadSeeker for file access.
diff --git a/internal/shellquote/shellquote.go b/internal/shellquote/shellquote.go
index f927ec2..7814bca 100644
--- a/internal/shellquote/shellquote.go
+++ b/internal/shellquote/shellquote.go
@@ -1,8 +1,49 @@
+// Package shellquote quotes strings for safe inclusion in shell command lines.
 package shellquote
 
 import "strings"
 
-// Quote returns a POSIX shell single-quoted representation of s.
-func Quote(s string) string {
+// QuotePOSIX returns a POSIX shell single-quoted representation of s.
+func QuotePOSIX(s string) string {
 	return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
 }
+
+// QuoteWindows returns a representation of s safe to pass as a single argument
+// on a Windows command line, following the CommandLineToArgvW parsing rules:
+// the argument is wrapped in double quotes; any run of backslashes immediately
+// preceding a double quote (or the closing quote) is doubled; interior double
+// quotes are escaped as \".
+//
+// The result is always wrapped, even when s has no whitespace or metacharacter
+// triggers: when NoneRuntime.Exec routes through bash on Windows (the default
+// when Git Bash is discoverable), an unquoted backslash-bearing path such as
+// `C:\tmp\script.cmd` would have its backslashes stripped by bash and reach
+// the downstream `cmd /c` / `powershell -File` as `C:tmpscript.cmd`. Wrapping
+// in double quotes keeps backslashes literal under both bash and cmd, and is
+// equally safe for CommandLineToArgvW consumers.
+func QuoteWindows(s string) string {
+	if s == "" {
+		return `""`
+	}
+	var b strings.Builder
+	b.WriteByte('"')
+	backslashes := 0
+	for i := range len(s) {
+		switch c := s[i]; c {
+		case '\\':
+			backslashes++
+		case '"':
+			b.WriteString(strings.Repeat(`\`, backslashes*2+1))
+			b.WriteByte('"')
+			backslashes = 0
+		default:
+			b.WriteString(strings.Repeat(`\`, backslashes))
+			backslashes = 0
+			b.WriteByte(c)
+		}
+	}
+	// Double trailing backslashes so they do not escape the closing quote.
+	b.WriteString(strings.Repeat(`\`, backslashes*2))
+	b.WriteByte('"')
+	return b.String()
+}
diff --git a/internal/shellquote/shellquote_test.go b/internal/shellquote/shellquote_test.go
index 697cc8e..e621d56 100644
--- a/internal/shellquote/shellquote_test.go
+++ b/internal/shellquote/shellquote_test.go
@@ -2,22 +2,41 @@ package shellquote
 
 import "testing"
 
-func TestQuote(t *testing.T) {
+func TestQuotePOSIX(t *testing.T) {
 	tests := []struct {
-		name  string
-		input string
-		want  string
+		in, want string
 	}{
-		{name: "plain", input: "hello", want: "'hello'"},
-		{name: "spaces", input: "hello world", want: "'hello world'"},
-		{name: "single quote", input: "can't", want: "'can'\\''t'"},
-		{name: "empty", input: "", want: "''"},
+		{"", "''"},
+		{"plain", "'plain'"},
+		{"with space", "'with space'"},
+		{"it's", `'it'\''s'`},
 	}
+	for _, tt := range tests {
+		if got := QuotePOSIX(tt.in); got != tt.want {
+			t.Errorf("QuotePOSIX(%q) = %q, want %q", tt.in, got, tt.want)
+		}
+	}
+}
 
+func TestQuoteWindows(t *testing.T) {
+	tests := []struct {
+		name, in, want string
+	}{
+		{"empty", "", `""`},
+		// QuoteWindows always wraps in double quotes so bash (which strips
+		// unquoted backslashes) does not mangle paths like `C:\tmp\file`.
+		{"plain", "plain", `"plain"`},
+		{"backslash path no space", `C:\tmp\s.ps1`, `"C:\tmp\s.ps1"`},
+		{"space", `C:\Program Files\s.exe`, `"C:\Program Files\s.exe"`},
+		{"interior quote", `a"b`, `"a\"b"`},
+		{"trailing backslash with space", `a b\`, `"a b\\"`},
+		{"backslash before quote", `a\"b`, `"a\\\"b"`},
+		{"cmd metachar", "a&b", `"a&b"`},
+	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			if got := Quote(tt.input); got != tt.want {
-				t.Fatalf("Quote(%q) = %q, want %q", tt.input, got, tt.want)
+			if got := QuoteWindows(tt.in); got != tt.want {
+				t.Errorf("QuoteWindows(%q) = %q, want %q", tt.in, got, tt.want)
 			}
 		})
 	}
diff --git a/scripts/windows/hooks.ps1 b/scripts/windows/hooks.ps1
new file mode 100644
index 0000000..a42fb8f
--- /dev/null
+++ b/scripts/windows/hooks.ps1
@@ -0,0 +1,11 @@
+#!/usr/bin/env pwsh
+# Point Git at this repo's hooks (run once per clone).
+# PowerShell counterpart of the `hooks` target in the Makefile, for Windows
+# contributors who do not have GNU make available.
+$ErrorActionPreference = 'Stop'
+
+Set-Location (git rev-parse --show-toplevel)
+if ((git config core.hooksPath) -ne '.githooks') {
+    git config core.hooksPath .githooks
+    Write-Host 'git hooks installed (.githooks)'
+}
diff --git a/scripts/windows/lint-tools.ps1 b/scripts/windows/lint-tools.ps1
new file mode 100644
index 0000000..4c66d7f
--- /dev/null
+++ b/scripts/windows/lint-tools.ps1
@@ -0,0 +1,23 @@
+#!/usr/bin/env pwsh
+# Install the pinned lint tools into .tools/bin.
+# PowerShell counterpart of the `lint-tools` target in the Makefile. The tool
+# versions are kept in sync with the Makefile (GOLANGCI_LINT_VERSION /
+# REVIVE_VERSION).
+$ErrorActionPreference = 'Stop'
+
+$golangciLintVersion = 'v2.11.4'
+$reviveVersion = 'v1.10.0'
+
+$repoRoot = (git rev-parse --show-toplevel)
+$toolsBin = Join-Path $repoRoot '.tools\bin'
+New-Item -ItemType Directory -Force -Path $toolsBin | Out-Null
+
+$env:GOBIN = $toolsBin
+$env:GOFLAGS = '-buildvcs=false'
+
+go install "github.com/golangci/golangci-lint/v2/cmd/golangci-lint@$golangciLintVersion"
+if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE }
+go install "github.com/mgechev/revive@$reviveVersion"
+if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE }
+
+Write-Host "lint tools installed into $toolsBin"
diff --git a/scripts/windows/verify.ps1 b/scripts/windows/verify.ps1
new file mode 100644
index 0000000..3bb0ce8
--- /dev/null
+++ b/scripts/windows/verify.ps1
@@ -0,0 +1,33 @@
+#!/usr/bin/env pwsh
+# Format check, vet, revive, and golangci-lint.
+# PowerShell counterpart of the `verify` target in the Makefile.
+$ErrorActionPreference = 'Stop'
+
+$repoRoot = (git rev-parse --show-toplevel)
+Set-Location $repoRoot
+$toolsBin = Join-Path $repoRoot '.tools\bin'
+
+# fmt-check: fail if any .go file is not gofmt-formatted.
+$unformatted = (gofmt -l .)
+if ($unformatted) {
+    Write-Host 'These files need gofmt (run: gofmt -w .):'
+    Write-Host $unformatted
+    exit 1
+}
+
+go vet ./...
+if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE }
+
+$revive = Join-Path $toolsBin 'revive.exe'
+$golangciLint = Join-Path $toolsBin 'golangci-lint.exe'
+if (-not (Test-Path $revive) -or -not (Test-Path $golangciLint)) {
+    & (Join-Path $PSScriptRoot 'lint-tools.ps1')
+}
+
+& $revive -config revive.toml ./...
+if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE }
+
+& $golangciLint run ./...
+if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE }
+
+Write-Host 'verify passed'