Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 48 additions & 4 deletions claudecode/evals/eval_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys
import subprocess
import shutil
import tempfile
import time
import threading
from typing import Dict, Any, Optional, Tuple, List
Expand Down Expand Up @@ -197,17 +198,39 @@ def _clean_worktrees(self, repo_path: str, branch_pattern: str = None) -> None:

def _get_eval_branch_name(self, test_case: EvalCase) -> str:
"""Generate a branch name for evaluation.

Args:
test_case: Test case being evaluated

Returns:
Branch name for the evaluation
"""
# Create a safe branch name from repo and PR
safe_repo = test_case.repo_name.replace('/', '-').replace('.', '-')
timestamp = time.strftime('%Y%m%d-%H%M%S')
return f"eval-pr-{safe_repo}-{test_case.pr_number}-{timestamp}"

def _make_askpass_helper(self) -> str:
"""Write a temp shell script that prints $GIT_AUTH_TOKEN.

Used by ``git clone`` via the ``GIT_ASKPASS`` environment variable
so the GitHub token never has to live in argv (where other local
users could see it via ``ps``) or in the cloned repo's
``.git/config``. The helper file is created with mode 0o700 and
is unlinked by the caller in a ``finally`` block.
"""
fd, path = tempfile.mkstemp(prefix='gitaskpass-', suffix='.sh')
try:
with os.fdopen(fd, 'w') as f:
f.write('#!/bin/sh\nprintf %s "$GIT_AUTH_TOKEN"\n')
os.chmod(path, 0o700)
return path
except Exception:
try:
os.unlink(path)
except OSError:
pass
raise

def _setup_repository(self, test_case: EvalCase) -> Tuple[bool, str, str]:
"""Set up repository worktree for PR evaluation.
Expand All @@ -233,16 +256,37 @@ def _setup_repository(self, test_case: EvalCase) -> Tuple[bool, str, str]:
if not os.path.exists(base_repo_path):
self.log(f"Cloning {repo_name} to {base_repo_path}")
clone_url = f"https://github.com/{repo_name}.git"
clone_env = os.environ.copy()
askpass_helper: Optional[str] = None
if self.github_token:
clone_url = f"https://{self.github_token}@github.com/{repo_name}.git"

# Pass the token via GIT_ASKPASS so it never appears in
# argv (visible to other local users via `ps`) or in
# the cloned repo's .git/config. The askpass helper is
# a tiny shell script that prints $GIT_AUTH_TOKEN; the
# token only lives in this process's environment and
# in a mode-0700 helper file we delete in `finally`.
askpass_helper = self._make_askpass_helper()
clone_env['GIT_ASKPASS'] = askpass_helper
clone_env['GIT_AUTH_TOKEN'] = self.github_token
clone_env['GIT_TERMINAL_PROMPT'] = '0'
# Username is required so git skips the "Username for"
# prompt; the password is supplied by the askpass helper.
clone_url = f"https://x-access-token@github.com/{repo_name}.git"

try:
subprocess.run(['git', 'clone', '--filter=blob:none', clone_url, base_repo_path],
env=clone_env,
check=True, capture_output=True, timeout=TIMEOUT_CLONE)
except subprocess.CalledProcessError as e:
error_msg = f"Failed to clone repository: {e.stderr.decode()}"
self.log(error_msg)
return False, "", error_msg
finally:
if askpass_helper is not None:
try:
os.unlink(askpass_helper)
except OSError:
pass

# Clean up any stale worktrees for this evaluation
eval_branch_prefix = f"eval-pr-{safe_repo_name}-{pr_number}"
Expand Down
50 changes: 50 additions & 0 deletions claudecode/test_eval_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,56 @@ def test_setup_repository_clone(self, mock_exists, mock_run):
assert worktree_path != ""
assert error == ""

@patch('subprocess.run')
@patch('os.path.exists')
def test_setup_repository_clone_does_not_leak_token_in_argv(self, mock_exists, mock_run):
"""Regression test: when ``github_token`` is set, the token must
NOT appear in the argv of any ``subprocess.run`` call. argv is
visible to every other user on the host via ``ps``, so embedding
the token there is a credential exposure. The fix uses
``GIT_ASKPASS`` + an env var so the token only lives in this
process's environment and a mode-0700 helper file."""
mock_exists.return_value = False # Repository doesn't exist
secret_token = 'ghp_secret_token_must_not_appear_in_argv_AAAAAAAAAAAAAAAAAAAA'
mock_run.side_effect = [
Mock(returncode=0, stdout=secret_token + '\n'), # gh auth token
Mock(returncode=0), # git clone
Mock(returncode=0), # git fetch
Mock(returncode=0), # git worktree add
]
with patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'}, clear=False):
os.environ.pop('GITHUB_TOKEN', None)
engine = EvaluationEngine()
assert engine.github_token == secret_token

case = EvalCase("owner/repo", 123)
success, _, _ = engine._setup_repository(case)
assert success is True

# Walk every recorded subprocess.run call and confirm the token
# never appears in any positional or keyword argv.
for call in mock_run.call_args_list:
argv = call.args[0] if call.args else call.kwargs.get('args', [])
for arg in argv:
assert secret_token not in str(arg), (
f"github token leaked into subprocess argv: {arg!r}"
)

# The clone URL should use the ``x-access-token`` placeholder
# username rather than embedding the token. argv[1] is the git
# subcommand; the URL is the second-to-last positional arg.
clone_call = mock_run.call_args_list[1]
clone_argv = clone_call.args[0]
assert any('x-access-token@github.com' in str(a) for a in clone_argv), (
f"expected x-access-token placeholder in clone argv, got {clone_argv!r}"
)
# GIT_ASKPASS must be set in the env passed to the clone subprocess.
clone_env = clone_call.kwargs.get('env') or {}
assert clone_env.get('GIT_ASKPASS'), (
"GIT_ASKPASS must be set on the clone subprocess env"
)
assert clone_env.get('GIT_AUTH_TOKEN') == secret_token

@patch('subprocess.run')
@patch('os.path.exists')
def test_setup_repository_existing(self, mock_exists, mock_run):
Expand Down