diff --git a/.gitignore b/.gitignore index 45dc651..f3c3ed5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,25 @@ +# Runtime artifacts +.supervisor.log +.supervisor.lock +.supervisor_snapshot/ +.logger_runs/ +.log/ +.mle_log.jsonl +gym_log.json + +# Python/editor cruft __pycache__/ -*.egg-info/ -dist/ -build/ -.DS_Store *.pyc +.DS_Store + +# gym-environment +.claudeignore +.copilotignore +.cursorignore +.cursorrules +.geminiignore +.github +.gitignore +AGENTS.md +CLAUDE.md +GEMINI.md diff --git a/README.md b/README.md index 0fdb41a..bfb6d22 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # aicodinggym-cli CLI tool for the [AI Coding Gym](https://aicodinggym.com) platform. -Supports two benchmarks: **SWE-bench** (code bug fixes) and **MLE-bench** (ML competitions). +Supports three benchmarks: **SWE-bench** (code bug fixes), **MLE-bench** (ML competitions), and **Code Review** challenges. **Install:** `pip install aicodinggym-cli` **Entry point:** `aicodinggym` @@ -24,6 +24,11 @@ aicodinggym swe submit django__django-10097 aicodinggym mle download spaceship-titanic # ... train model, generate predictions ... aicodinggym mle submit spaceship-titanic -F predictions.csv + +# 4. Code Review: fetch, review, submit +aicodinggym cr fetch keycloak-0008 +# ... read diff.patch, write your review in review.md ... +aicodinggym cr submit keycloak-0008 -f review.md ``` --- @@ -49,7 +54,10 @@ aicodinggym configure --user-id USER_ID [--workspace-dir DIR] #### `aicodinggym swe fetch PROBLEM_ID` -Fetch a problem and clone the repo locally. +Fetch a problem and clone the repo locally. After a successful `swe fetch`, `mle download`, or `cr fetch`, the CLI downloads agent instruction files from [AICodingGym/gym-environment](https://github.com/AICodingGym/gym-environment) via the GitHub Contents API. By default it uses the **`test` branch**. Override with environment variables: + +- `AICODINGGYM_GYM_ENV_REPO` — `owner/repo` (default: `AICodingGym/gym-environment`) +- `AICODINGGYM_GYM_ENV_REF` — branch, tag, or commit SHA for `?ref=` (default: `test` when unset) ``` aicodinggym swe fetch PROBLEM_ID [--user-id ID] [--workspace-dir DIR] @@ -135,6 +143,38 @@ aicodinggym mle submit COMPETITION_ID -F FILE [--user-id ID] [--message MSG] --- +### `aicodinggym cr` — Code Review Commands + +#### `aicodinggym cr fetch PROBLEM_ID` + +Download the PR diff and create a `review.md` template. + +``` +aicodinggym cr fetch PROBLEM_ID [--user-id ID] [--workspace-dir DIR] +``` + +Creates in `//`: +- `diff.patch` — the full diff between base and head branches +- `review.md` — template to fill in your review (only created if not already present) + +#### `aicodinggym cr submit PROBLEM_ID` + +Submit your code review. + +``` +aicodinggym cr submit PROBLEM_ID -f review.md [--user-id ID] +aicodinggym cr submit PROBLEM_ID -m "Inline review text" +echo "My review" | aicodinggym cr submit PROBLEM_ID +``` + +| Option | Description | +|---|---| +| `-f, --file` | Path to a file containing your review (e.g. `review.md`) | +| `-m, --message` | Inline review text | +| stdin | Pipe review text from stdin | + +--- + ## File Structure ``` diff --git a/__init__.py b/__init__.py index d79e232..40ec2de 100644 --- a/__init__.py +++ b/__init__.py @@ -1,3 +1,44 @@ -"""AI Coding Gym CLI.""" +"""AI Coding Gym CLI. -__version__ = "0.2.0" +Imports are lazy so tooling that loads this file without package context +(e.g. some pytest collection paths) does not fail on relative imports. +""" + +from __future__ import annotations + +import importlib +import importlib.metadata +from typing import TYPE_CHECKING, Any + +try: + __version__ = importlib.metadata.version("aicodinggym-cli") +except importlib.metadata.PackageNotFoundError: # pragma: no cover - dev without install + __version__ = "0.0.0" + +__all__ = [ + "__version__", + "ExperimentLog", + "LogEntry", + "capture_mle_provenance", + "log_entry", + "print_summary", + "set_log_path", + "gym_logger", +] + + +def __getattr__(name: str) -> Any: + if name in ("ExperimentLog", "LogEntry", "capture_mle_provenance"): + m = importlib.import_module("aicodinggym.experiment_log") + return getattr(m, name) + if name in ("log_entry", "print_summary", "set_log_path"): + m = importlib.import_module("aicodinggym.gym_logger") + return getattr(m, name) + if name == "gym_logger": + return importlib.import_module("aicodinggym.gym_logger") + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +if TYPE_CHECKING: + from .experiment_log import ExperimentLog, LogEntry, capture_mle_provenance + from .gym_logger import log_entry, print_summary, set_log_path diff --git a/api.py b/api.py index 3e91442..6b08d03 100644 --- a/api.py +++ b/api.py @@ -1,6 +1,8 @@ """HTTP API client for the AI Coding Gym backend at aicodinggym.com.""" +import gzip import os +from pathlib import Path import requests @@ -86,6 +88,11 @@ def submit_notification(problem_id: str, user_id: str, commit_hash: str, }) +def fetch_pr(user_id: str, problem_id: str) -> dict: + """Fetch CR problem info. Returns {'base_branch': ..., 'head_branch': ..., 'repo_url': ...}.""" + return _post("code-review-fetch", {"user_id": user_id, "problem_id": problem_id}) + + def cr_submit_review(user_id: str, problem_id: str, review: str) -> dict: """Submit a code review.""" return _post("code-review-submit", { @@ -96,8 +103,39 @@ def cr_submit_review(user_id: str, problem_id: str, review: str) -> dict: def mlebench_download_info(user_id: str, competition_id: str, dest_path: str) -> None: - """Download dataset for an MLE-bench competition directly to dest_path.""" - resp = _get(f"competitions/{competition_id}/download", stream=True) + """Download dataset for an MLE-bench competition directly to dest_path. + + Uses a long read timeout: large zips can take many minutes between chunks + over slow links; the default 30s read timeout would abort mid-stream. + """ + read_s = int(os.environ.get("AICODINGGYM_DOWNLOAD_READ_TIMEOUT", "0")) + if read_s <= 0: + read_s = 7200 # seconds between reads; large zips need headroom + url = f"{API_BASE}/competitions/{competition_id}/download" + try: + resp = requests.get( + url, + stream=True, + timeout=(120, read_s), + ) + resp.raise_for_status() + except requests.ConnectionError: + raise APIError( + f"Cannot connect to {API_BASE}.\n" + "Check your internet connection and try again." + ) + except requests.Timeout: + raise APIError(f"Download from {url} timed out.") + except requests.HTTPError as e: + body = "" + try: + body = e.response.json().get("detail", e.response.text) + except Exception: + body = e.response.text + raise APIError(f"API error (HTTP {e.response.status_code}): {body}") + except requests.RequestException as e: + raise APIError(f"Request failed: {e}") + with open(dest_path, "wb") as f: for chunk in resp.iter_content(chunk_size=8192): f.write(chunk) @@ -118,15 +156,17 @@ def mlebench_download_file(url: str, dest_path: str, timeout: int = 300) -> None def mlebench_submit_csv(user_id: str, competition_id: str, csv_path: str) -> dict: """Upload a prediction CSV for an MLE-bench competition.""" try: + csv_name = Path(csv_path).name with open(csv_path, "rb") as f: - resp = requests.post( - f"{API_BASE}/competitions/{competition_id}/submit", - data={"user_id": user_id, "competition_id": competition_id}, - files={"file": (f.name, f, "text/csv")}, - timeout=60, - ) - resp.raise_for_status() - return resp.json() + compressed = gzip.compress(f.read()) + resp = requests.post( + f"{API_BASE}/competitions/{competition_id}/submit", + data={"user_id": user_id, "competition_id": competition_id}, + files={"file": (csv_name + ".gz", compressed, "application/gzip")}, + timeout=120, + ) + resp.raise_for_status() + return resp.json() except requests.ConnectionError: raise APIError( f"Cannot connect to {API_BASE}.\n" diff --git a/cli.py b/cli.py index 23172fe..be93497 100644 --- a/cli.py +++ b/cli.py @@ -1,7 +1,7 @@ """AI Coding Gym CLI - main entry point. A command-line tool for the AI Coding Gym platform (https://aicodinggym.com). -Supports SWE-bench and MLE-bench challenges. +Supports SWE-bench, MLE-bench, and Code Review challenges. SETUP (required before any other command): aicodinggym configure --user-id YOUR_USER_ID @@ -15,15 +15,24 @@ aicodinggym mle download spaceship-titanic # ... train model, generate predictions ... aicodinggym mle submit spaceship-titanic -F submission.csv + +CODE REVIEW WORKFLOW: + aicodinggym cr fetch sentry-0001 + # ... review the diff and write your review ... + aicodinggym cr submit sentry-0001 -f review.md """ +import json import os import platform import re import subprocess import sys +import time +import urllib.request from datetime import datetime from pathlib import Path +from typing import Any import click @@ -32,6 +41,7 @@ APIError, configure as api_configure, cr_submit_review, + fetch_pr as api_fetch_pr, fetch_problem as api_fetch_problem, mlebench_download_file, mlebench_download_info, @@ -51,9 +61,16 @@ clone_repo_cr, generate_ssh_key_pair, reset_to_setup_commit, + run_git_command, ) +def _hyperlink(url: str, text: str | None = None) -> str: + """Return an OSC 8 terminal hyperlink. Falls back to plain URL on unsupported terminals.""" + label = text or url + return f"\033]8;;{url}\033\\{label}\033]8;;\033\\" + + def _error(msg: str) -> None: """Print an error message to stderr and exit.""" click.echo(f"Error: {msg}", err=True) @@ -65,6 +82,215 @@ def _warn(msg: str) -> None: click.echo(f"Warning: {msg}", err=True) +_GYM_ENV_SKIP = {"README.md"} +_GYM_ENV_MLE_ONLY: set[str] = set() + + +def _gym_env_repo() -> str: + """GitHub ``owner/repo`` for gym-environment assets (override with env).""" + return os.environ.get("AICODINGGYM_GYM_ENV_REPO", "").strip() or "AICodingGym/gym-environment" + + +def _gym_env_ref() -> str: + """Git ref (branch, tag, or commit) for Contents API ``?ref=``. + + If ``AICODINGGYM_GYM_ENV_REF`` is unset or empty, defaults to ``test`` so + fetched problems get the same supervisor/dashboard stack as CI/staging. + Set ``AICODINGGYM_GYM_ENV_REF=main`` (or another branch) to override. + """ + ref = os.environ.get("AICODINGGYM_GYM_ENV_REF", "") + ref = ref.strip() + if ref: + return ref + return "test" + + +def _gym_env_contents_api_url(subpath: str = "") -> str: + """GitHub Contents API URL for gym-environment at the configured ref.""" + base = f"https://api.github.com/repos/{_gym_env_repo()}/contents" + subpath = subpath.strip("/") + if subpath: + base = f"{base}/{subpath}" + ref = _gym_env_ref() + return f"{base}?ref={ref}" + + +def _install_gym_environment(dest: Path, challenge: str | None = None) -> None: + """Download gym-environment files from GitHub into dest and add to .gitignore. + + Ref and repo are configurable via ``AICODINGGYM_GYM_ENV_REF`` and + ``AICODINGGYM_GYM_ENV_REPO``. When ref is unset, the ``test`` branch is used. + """ + try: + req = urllib.request.Request( + _gym_env_contents_api_url(), + headers={"Accept": "application/vnd.github.v3+json"}, + ) + with urllib.request.urlopen(req, timeout=15) as resp: + entries = json.loads(resp.read()) + except Exception as e: + _warn(f"Could not fetch gym-environment file list: {e}") + return + + downloaded: list[str] = [] + + for entry in entries: + name = entry.get("name", "") + if name in _GYM_ENV_SKIP: + continue + etype = entry.get("type") + + if etype == "file": + url = entry.get("download_url") + if not url: + continue + try: + with urllib.request.urlopen(url, timeout=15) as r: + (dest / name).write_bytes(r.read()) + downloaded.append(name) + except Exception as e: + _warn(f"Failed to download {name}: {e}") + + elif etype == "dir": + # Fetch subdirectory contents recursively (one level deep) + try: + sub_req = urllib.request.Request( + _gym_env_contents_api_url(name), + headers={"Accept": "application/vnd.github.v3+json"}, + ) + with urllib.request.urlopen(sub_req, timeout=15) as r: + sub_entries = json.loads(r.read()) + except Exception as e: + _warn(f"Failed to list directory {name}: {e}") + continue + + sub_dir = dest / name + sub_dir.mkdir(parents=True, exist_ok=True) + for sub in sub_entries: + sub_name = sub.get("name", "") + sub_url = sub.get("download_url") + if sub.get("type") != "file" or not sub_url: + continue + try: + with urllib.request.urlopen(sub_url, timeout=15) as r: + (sub_dir / sub_name).write_bytes(r.read()) + except Exception as e: + _warn(f"Failed to download {name}/{sub_name}: {e}") + downloaded.append(name) + + # Seed empty solution_log.json if absent (AI agent populates it after each prompt) + log_file = dest / "solution_log.json" + if not log_file.exists(): + log_file.write_text( + '{"version": "1.0", "problem": "", "problem_type": "mle", "prompts": []}\n', + encoding="utf-8", + ) + + # Append to .gitignore + gitignore = dest / ".gitignore" + existing = gitignore.read_text(encoding="utf-8") if gitignore.exists() else "" + existing_lines = set(existing.splitlines()) + gym_artifacts = [".gym_watcher.lock", ".gym_watcher.log", "solution_log.json", ".dashboard.tmp"] + if downloaded: + new_entries = [f for f in downloaded if f not in existing_lines and f"/{f}" not in existing_lines] + new_entries += [a for a in gym_artifacts if a not in existing_lines and f"/{a}" not in existing_lines] + if new_entries: + block = "\n# gym-environment\n" + "\n".join(new_entries) + "\n" + with open(gitignore, "a", encoding="utf-8", newline="\n") as fh: + fh.write(block) + + +def _open_in_browser(path: Path) -> bool: + """Best-effort open a local file in the user's default browser. + + Returns True if the open call was dispatched, False otherwise. Never + raises - a missing display / headless box should not break ``fetch``. + """ + try: + if not path.exists(): + # Create a minimal placeholder so the browser has something to load; + # the watcher will overwrite it moments later. + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("AI Coding Gym

Preparing dashboard\u2026

", encoding="utf-8") + import webbrowser + return bool(webbrowser.open(path.resolve().as_uri())) + except Exception: + return False + + +def _autostart_watcher(problem_dir: Path) -> None: + """Launch gym_watcher.py in background inside problem_dir. Non-fatal.""" + problem_dir = Path(problem_dir) + watcher = problem_dir / "gym_watcher.py" + if not watcher.exists(): + return + lock = problem_dir / ".gym_watcher.lock" + if lock.exists(): + try: + pid = int(lock.read_text(encoding="utf-8").strip()) + except (OSError, ValueError): + pid = None + if pid and _pid_alive(pid): + click.echo("Gym watcher already running; skipping auto-start.") + return + try: + lock.unlink() + except OSError: + pass + log_path = problem_dir / ".gym_watcher.log" + try: + cmd = [sys.executable, str(watcher), str(problem_dir)] + log_fh = open(log_path, "ab", buffering=0) + kwargs: dict[str, Any] = { + "stdout": log_fh, + "stderr": log_fh, + "stdin": subprocess.DEVNULL, + "cwd": str(problem_dir), + } + if platform.system() == "Windows": + DETACHED_PROCESS = 0x00000008 + CREATE_NEW_PROCESS_GROUP = 0x00000200 + kwargs["creationflags"] = DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP + kwargs["close_fds"] = False + else: + kwargs["start_new_session"] = True + subprocess.Popen(cmd, **kwargs) # type: ignore[arg-type] + dashboard = problem_dir / "dashboard.html" + opened = _open_in_browser(dashboard) + msg = "Gym watcher started (logs: .gym_watcher.log)." + if not opened: + msg += f" Open {dashboard} to view dashboard." + click.echo(msg) + except Exception as exc: + _warn(f"Could not auto-start gym_watcher.py: {exc}.") + + +def _pid_alive(pid: int) -> bool: + """Cross-platform ``kill(0)`` equivalent.""" + try: + if platform.system() == "Windows": + # ``tasklist`` is universally available on Windows; short-circuit via signal. + out = subprocess.run( + ["tasklist", "/FI", f"PID eq {pid}", "/NH"], + capture_output=True, text=True, check=False, timeout=5, + ) + return str(pid) in out.stdout + else: + os.kill(pid, 0) + return True + except (ProcessLookupError, PermissionError, subprocess.TimeoutExpired, OSError): + return False + + +def _shquote(text: str) -> str: + """Minimal POSIX-shell quoting sufficient for paths used by the autostart shim.""" + if not text: + return "''" + if all(ch.isalnum() or ch in "@%+=:,./-_" for ch in text): + return text + return "'" + text.replace("'", "'\"'\"'") + "'" + + def _resolve_user_id(config: dict, user_id: str | None) -> str: """Resolve user_id from argument or config, with helpful error.""" if user_id: @@ -230,7 +456,9 @@ def _resolve_key_path(config: dict, creds: dict | None = None) -> Path: " aicodinggym swe fetch django__django-10097\n" " aicodinggym swe submit django__django-10097 --message 'Fix auth bug'\n" " aicodinggym mle download spaceship-titanic\n" - " aicodinggym mle submit spaceship-titanic -F predictions.csv\n\n" + " aicodinggym mle submit spaceship-titanic -F predictions.csv\n" + " aicodinggym cr fetch sentry-0001\n" + " aicodinggym cr submit sentry-0001 -f review.md\n\n" "\b\n" "WEBSITE:\n" " https://aicodinggym.com\n" @@ -319,6 +547,8 @@ def configure(user_id: str, workspace_dir: str | None): } save_config(config) + _install_gym_environment(Path(resolved_workspace)) + click.echo( f"\nConfiguration saved successfully!\n" f"\n" @@ -328,7 +558,7 @@ def configure(user_id: str, workspace_dir: str | None): f" SSH Key: {private_key_path}\n" f" Config: ~/.aicodinggym/config.json\n" f"\n" - f"You can now use 'aicodinggym swe' and 'aicodinggym mle' commands." + f"You can now use 'aicodinggym swe', 'aicodinggym mle', and 'aicodinggym cr' commands." ) except APIError as e: _error(str(e)) @@ -434,6 +664,9 @@ def swe_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): if not success: _error(msg) + _install_gym_environment(workspace / problem_id, "swe") + _autostart_watcher(workspace / problem_id) + click.echo( f"\nSuccessfully fetched problem: {problem_id}\n" f"\n" @@ -550,7 +783,7 @@ def swe_submit(problem_id: str, user_id: str | None, message: str | None, f" Branch: {branch}\n" f" Status: Pushed and backend notified\n" f"\n" - f"Your solution has been submitted for evaluation!" + f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/swe/{problem_id}')}" ) @@ -860,8 +1093,9 @@ def cr(): \b WORKFLOW: - 1. aicodinggym cr fetch CR_PROBLEM_ID # Clone repo with base/head branches - 2. aicodinggym cr submit CR_PROBLEM_ID -f review.md # Submit your review + 1. aicodinggym cr fetch CR_PROBLEM_ID # Download diff + create review.md + 2. (edit review.md with your findings) + 3. aicodinggym cr submit CR_PROBLEM_ID -f review.md # Submit your review """ pass @@ -872,20 +1106,20 @@ def cr(): @click.option("--workspace-dir", default=None, type=click.Path(), help="Directory to clone into. Overrides configured workspace.") def cr_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): - """Fetch a Code Review problem repo with base and head branches. + """Fetch a Code Review problem: downloads the PR diff and creates a review template. - Clones the Problemset-CodeReview repository and checks out both the - base and head branches so you can diff them locally. + Clones the repository, generates diff.patch from base→head, and creates + review.md as a template to fill in your review. \b ARGUMENTS: - PROBLEM_ID The code review problem identifier (e.g., 'cr/sentry-0001'). + PROBLEM_ID The code review problem identifier (e.g., 'keycloak-0008'). \b EXAMPLE: - aicodinggym cr fetch cr/sentry-0001 - cd /cr/sentry-0001 - git diff sentry-0001/base..sentry-0001/head + aicodinggym cr fetch keycloak-0008 + # Edit review.md in the problem directory, then: + aicodinggym cr submit keycloak-0008 -f review.md """ config = load_config() uid = _resolve_user_id(config, user_id) @@ -893,7 +1127,7 @@ def cr_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): try: click.echo(f"Fetching problem '{problem_id}' from server...") - data = api_fetch_problem(uid, problem_id) + data = api_fetch_pr(uid, problem_id) except APIError as e: _error(str(e)) @@ -924,14 +1158,44 @@ def cr_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): if not success: _error(msg) + _install_gym_environment(workspace / problem_id, "cr") + _autostart_watcher(workspace / problem_id) + + problem_dir = workspace / problem_id + + # Generate diff.patch + diff_result = run_git_command( + ["git", "diff", f"{base_branch}..{head_branch}"], str(problem_dir) + ) + diff_path = problem_dir / "diff.patch" + diff_path.write_text(diff_result.stdout) + + # Create review.md template if it doesn't exist yet + review_path = problem_dir / "review.md" + if not review_path.exists(): + review_path.write_text( + f"# Code Review: {problem_id}\n\n" + "## Summary\n\n" + "\n\n" + "## Issues Found\n\n" + "\n\n" + "## Suggestions\n\n" + "\n\n" + "## Verdict\n\n" + "\n" + ) + + cat_cmd = "type" if sys.platform == "win32" else "cat" click.echo( f"\nSuccessfully fetched: {problem_id}\n" f"\n" - f" {msg}\n" + f" Diff saved to: {diff_path}\n" + f" Review template: {review_path}\n" f"\n" - f"To see the diff:\n" - f" cd {workspace / problem_id}\n" - f" git diff {base_branch}..{head_branch}\n" + f"Next steps:\n" + f" 1. Review the diff: {cat_cmd} {diff_path}\n" + f" 2. Write your review in {review_path}\n" + f" 3. Submit: aicodinggym cr submit {problem_id} -f review.md\n" ) @@ -955,13 +1219,13 @@ def cr_submit(problem_id: str, user_id: str | None, review_file: str | None, \b ARGUMENTS: - PROBLEM_ID The code review problem identifier (e.g., 'cr/sentry-0001'). + PROBLEM_ID The code review problem identifier (e.g., 'sentry-0001'). \b EXAMPLE: - aicodinggym cr submit cr/sentry-0001 -f review.md - aicodinggym cr submit cr/sentry-0001 -m "Found a null pointer bug on line 42" - echo "My review" | aicodinggym cr submit cr/sentry-0001 + aicodinggym cr submit sentry-0001 -f review.md + aicodinggym cr submit sentry-0001 -m "Found a null pointer bug on line 42" + echo "My review" | aicodinggym cr submit sentry-0001 """ config = load_config() uid = _resolve_user_id(config, user_id) @@ -996,7 +1260,7 @@ def cr_submit(problem_id: str, user_id: str | None, review_file: str | None, f"\n" f" Status: {result.get('status', 'COMPLETED')}\n" f"\n" - f"View results at: https://aicodinggym.com/challenge/{problem_id}" + f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/cr/{problem_id}')}" ) @@ -1056,6 +1320,9 @@ def mle_download(competition_id: str, user_id: str | None, workspace_dir: str | except APIError as e: _error(str(e)) + _install_gym_environment(workspace / competition_id, "mle") + _autostart_watcher(workspace / competition_id) + click.echo( f"\nDataset downloaded to: {dest_path}\n" f"\nNext step: train your model and submit predictions with:\n" @@ -1118,6 +1385,7 @@ def mle_submit(competition_id: str, csv_path: str, user_id: str | None, except APIError as e: _error(str(e)) + score_msg = result.get("message", "Submission received for scoring.") score = result.get("score") @@ -1129,4 +1397,4 @@ def mle_submit(competition_id: str, csv_path: str, user_id: str | None, ) if score is not None: click.echo(f" Score: {score}\n") - click.echo("Your prediction has been submitted for scoring!") + click.echo(f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/mle/{competition_id}')}") diff --git a/config.py b/config.py index 8b32392..7c5065e 100644 --- a/config.py +++ b/config.py @@ -6,6 +6,9 @@ """ import json +import os +import subprocess +import sys from pathlib import Path from typing import Any @@ -19,8 +22,22 @@ def ensure_config_dir() -> Path: - """Create the config directory with secure permissions if it doesn't exist.""" - CONFIG_DIR.mkdir(mode=0o700, exist_ok=True) + """Create the config directory with secure permissions if it doesn't exist. + + On Unix/macOS: mode 0o700 (owner-only access). + On Windows: removes inherited ACLs and grants full control only to the + current user via icacls. + """ + created = not CONFIG_DIR.exists() + CONFIG_DIR.mkdir(mode=0o700, parents=True, exist_ok=True) + if created and sys.platform == "win32": + username = os.environ.get("USERNAME", "") + if username: + subprocess.run( + ["icacls", str(CONFIG_DIR), "/inheritance:r", + "/grant:r", f"{username}:(OI)(CI)(F)"], + capture_output=True, + ) return CONFIG_DIR diff --git a/git_ops.py b/git_ops.py index 1d95ae8..db742df 100644 --- a/git_ops.py +++ b/git_ops.py @@ -4,18 +4,69 @@ import re import shutil import subprocess +import sys from pathlib import Path from typing import Optional from .config import ensure_config_dir +def _find_git_ssh() -> str | None: + """On Windows, find Git for Windows' bundled ssh.exe. + + Windows may have two SSH binaries on PATH: the built-in OpenSSH + (C:\\Windows\\System32\\OpenSSH\\ssh.exe) and Git for Windows' MSYS2 + ssh (C:\\Program Files\\Git\\usr\\bin\\ssh.exe). System32 is usually + first on PATH, so an unqualified 'ssh' resolves to Windows OpenSSH, + which can trigger GUI credential dialogs or deadlock when stdout is + captured. This function returns the full path to Git's bundled ssh + so we can reference it explicitly in GIT_SSH_COMMAND. + """ + if sys.platform != "win32": + return None + git_path = shutil.which("git") + if not git_path: + return None + # Walk up from git.exe to find the Git root containing usr/bin/ssh.exe. + # Handles cmd/, bin/, and mingw64/bin/ layouts. + candidate = Path(git_path).resolve().parent + for _ in range(4): + ssh = candidate / "usr" / "bin" / "ssh.exe" + if ssh.exists(): + return str(ssh).replace("\\", "/") + candidate = candidate.parent + return None + + def _validate_git_ref(name: str, label: str) -> None: """Raise ValueError if name contains suspicious shell metacharacters.""" if re.search(r'[;&|`$(){}]', name): raise ValueError(f"Invalid {label}: {name!r}") +def _restrict_key_permissions(key_path: Path) -> None: + """Restrict an SSH private key file to owner-only access. + + On Unix/macOS: chmod 600 (read/write owner only). + On Windows: uses icacls to remove inherited permissions and grant + full control only to the current user. SSH clients on both platforms + refuse to use a key whose permissions are too open. + """ + if sys.platform == "win32": + # Remove inherited ACLs, then grant only the current user full control. + # (F) = Full control, matching chmod 0o600 (owner read+write). + key_str = str(key_path) + username = os.environ.get("USERNAME", "") + if username: + subprocess.run( + ["icacls", key_str, "/inheritance:r", + "/grant:r", f"{username}:(F)"], + capture_output=True, + ) + else: + key_path.chmod(0o600) + + def generate_ssh_key_pair(user_id: str) -> tuple[Path, str]: """Generate an SSH key pair for the user. @@ -36,8 +87,14 @@ def generate_ssh_key_pair(user_id: str) -> tuple[Path, str]: if mcp_private.exists() and mcp_public.exists(): shutil.copy2(mcp_private, key_path) shutil.copy2(mcp_public, Path(f"{key_path}.pub")) - key_path.chmod(0o600) + _restrict_key_permissions(key_path) else: + if not shutil.which("ssh-keygen"): + raise RuntimeError( + "ssh-keygen is not installed or not on PATH.\n" + "On Windows, install Git for Windows (https://git-scm.com) " + "which includes ssh-keygen, or use the OpenSSH optional feature." + ) result = subprocess.run( ["ssh-keygen", "-t", "rsa", "-b", "4096", "-f", str(key_path), "-N", "", "-C", f"aicodinggym-{user_id}"], @@ -51,13 +108,34 @@ def generate_ssh_key_pair(user_id: str) -> tuple[Path, str]: return key_path, public_key -def run_git_command(cmd: str, cwd: str, key_path: Optional[Path] = None) -> subprocess.CompletedProcess: - """Execute a git command with optional SSH key configuration.""" +def run_git_command(cmd: list[str], cwd: str, key_path: Optional[Path] = None) -> subprocess.CompletedProcess: + """Execute a git command with optional SSH key configuration. + + cmd must be a list of arguments (e.g. ["git", "status"]). + """ env = os.environ.copy() if key_path: - env["GIT_SSH_COMMAND"] = f"ssh -i {key_path} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" + # Quote the key path in case it contains spaces (common on Windows). + # Use forward slashes — works on all platforms and avoids backslash escaping. + quoted_key = str(key_path).replace("\\", "/") + # On Windows, use Git for Windows' bundled ssh to avoid Windows native + # OpenSSH which can trigger GUI credential dialogs or deadlock when + # stdout is captured. Falls back to bare "ssh" if not found. + ssh_bin = _find_git_ssh() or "ssh" + # Always use /dev/null for UserKnownHostsFile. On macOS/Linux this is + # the native null device. On Windows, Git for Windows bundles MSYS2's + # ssh which translates /dev/null correctly. Using os.devnull ("nul") + # would break MSYS2's ssh which treats "nul" as a literal filename. + # BatchMode=yes prevents any interactive prompts (password, passphrase) + # that would cause a hang when stdout/stderr are captured. + env["GIT_SSH_COMMAND"] = ( + f'"{ssh_bin}" -i "{quoted_key}" ' + f"-o StrictHostKeyChecking=no " + f"-o UserKnownHostsFile=/dev/null " + f"-o BatchMode=yes" + ) - return subprocess.run(cmd, shell=True, cwd=cwd, capture_output=True, text=True, env=env) + return subprocess.run(cmd, cwd=cwd, capture_output=True, text=True, env=env) def clone_repo(repo_url: str, branch: str, dest_name: str, @@ -70,9 +148,9 @@ def clone_repo(repo_url: str, branch: str, dest_name: str, if problem_dir.exists(): # Check if already on the correct branch - result = run_git_command("git rev-parse --abbrev-ref HEAD", str(problem_dir)) + result = run_git_command(["git", "rev-parse", "--abbrev-ref", "HEAD"], str(problem_dir)) if result.returncode == 0 and result.stdout.strip() == branch: - pull = run_git_command(f"git pull origin {branch}", str(problem_dir), key_path) + pull = run_git_command(["git", "pull", "origin", branch], str(problem_dir), key_path) if pull.returncode != 0: return False, f"Git pull failed:\n{pull.stderr}" return True, f"Already exists. Updated to latest version.\nRepository: {problem_dir}\nBranch: {branch}" @@ -81,7 +159,7 @@ def clone_repo(repo_url: str, branch: str, dest_name: str, "Remove it first or use --workspace-dir to specify a different location." ) - cmd = f"git clone --single-branch --branch {branch} --depth 1 {repo_url} {dest_name}" + cmd = ["git", "clone", "--single-branch", "--branch", branch, "--depth", "1", repo_url, dest_name] result = run_git_command(cmd, workspace, key_path) if result.returncode != 0: @@ -108,12 +186,15 @@ def clone_repo_cr(repo_url: str, base_branch: str, head_branch: str, if problem_dir.exists(): # Already cloned — fetch latest for both branches for branch in (base_branch, head_branch): - result = run_git_command(f"git fetch origin {branch}", str(problem_dir), key_path) + result = run_git_command(["git", "fetch", "origin", branch], str(problem_dir), key_path) if result.returncode != 0: return False, f"Git fetch failed for {branch}:\n{result.stderr}" - result = run_git_command(f"git branch -f {branch} FETCH_HEAD", str(problem_dir)) + result = run_git_command(["git", "branch", "-f", branch, "FETCH_HEAD"], str(problem_dir)) if result.returncode != 0: return False, f"Failed to update branch {branch}:\n{result.stderr}" + result = run_git_command(["git", "checkout", head_branch], str(problem_dir)) + if result.returncode != 0: + return False, f"Failed to checkout head branch '{head_branch}':\n{result.stderr}" return True, ( f"Already exists. Updated both branches.\n" f"Repository: {problem_dir}\n" @@ -121,22 +202,26 @@ def clone_repo_cr(repo_url: str, base_branch: str, head_branch: str, ) # Clone base branch (shallow); depth 50 needed for diffing between branches - cmd = f"git clone --single-branch --branch {base_branch} --depth 50 {repo_url} {dest_name}" + cmd = ["git", "clone", "--single-branch", "--branch", base_branch, "--depth", "50", repo_url, dest_name] result = run_git_command(cmd, workspace, key_path) if result.returncode != 0: return False, f"Git clone failed:\n{result.stderr}" # Fetch head branch - fetch_cmd = f"git fetch origin {head_branch}" - result = run_git_command(fetch_cmd, str(problem_dir), key_path) + result = run_git_command(["git", "fetch", "origin", head_branch], str(problem_dir), key_path) if result.returncode != 0: return False, f"Failed to fetch head branch '{head_branch}':\n{result.stderr}" # Create local head branch tracking the fetched ref - result = run_git_command(f"git branch -f {head_branch} FETCH_HEAD", str(problem_dir)) + result = run_git_command(["git", "branch", "-f", head_branch, "FETCH_HEAD"], str(problem_dir)) if result.returncode != 0: return False, f"Failed to create branch {head_branch}:\n{result.stderr}" + # Check out head branch so the user starts on the code being reviewed + result = run_git_command(["git", "checkout", head_branch], str(problem_dir)) + if result.returncode != 0: + return False, f"Failed to checkout head branch '{head_branch}':\n{result.stderr}" + return True, ( f"Cloned to: {problem_dir}\n" f"Branches: {base_branch}, {head_branch}" @@ -151,29 +236,35 @@ def add_commit_push(problem_dir: str, branch: str, key_path: Path, """ pdir = Path(problem_dir) - # Stage all changes except .github - result = run_git_command("git add -A -- . ':(exclude).github'", str(pdir)) + # Stage all changes except dotfiles/dotdirs and markdown files + result = run_git_command([ + "git", "add", "-A", "--", ".", + ":(exclude).*", + ":(exclude)*.md", + ], str(pdir)) if result.returncode != 0: return False, f"Git add failed:\n{result.stderr}", "" # Check for staged changes - status = run_git_command("git diff --cached --name-only", str(pdir)) + status = run_git_command(["git", "diff", "--cached", "--name-only"], str(pdir)) if not status.stdout.strip(): return False, "No changes to commit. Your working directory is clean.", "" - # Commit - safe_msg = message.replace('"', '\\"') - result = run_git_command(f'git commit -m "{safe_msg}"', str(pdir)) + # Commit — pass message directly as a list arg; no shell escaping needed + result = run_git_command(["git", "commit", "-m", message], str(pdir)) if result.returncode != 0: return False, f"Git commit failed:\n{result.stderr}", "" # Get commit hash - hash_result = run_git_command("git rev-parse HEAD", str(pdir)) + hash_result = run_git_command(["git", "rev-parse", "HEAD"], str(pdir)) commit_hash = hash_result.stdout.strip() # Push - push_flag = "--force-with-lease " if force else "" - result = run_git_command(f"git push {push_flag}origin {branch}", str(pdir), key_path) + push_cmd = ["git", "push"] + if force: + push_cmd.append("--force-with-lease") + push_cmd += ["origin", branch] + result = run_git_command(push_cmd, str(pdir), key_path) if result.returncode != 0: return False, f"Git push failed:\n{result.stderr}", commit_hash @@ -185,7 +276,7 @@ def reset_to_setup_commit(problem_dir: str) -> tuple[bool, str]: Returns (success, message). """ - log_result = run_git_command("git log --format=%H:%s --reverse", problem_dir) + log_result = run_git_command(["git", "log", "--format=%H:%s", "--reverse"], problem_dir) if log_result.returncode != 0: return False, f"Git log failed:\n{log_result.stderr}" @@ -206,11 +297,11 @@ def reset_to_setup_commit(problem_dir: str) -> tuple[bool, str]: f"Expected a commit message starting with '{setup_prefix}'." ) - reset = run_git_command(f"git reset --hard {setup_commit}", problem_dir) + reset = run_git_command(["git", "reset", "--hard", setup_commit], problem_dir) if reset.returncode != 0: return False, f"Git reset failed:\n{reset.stderr}" - clean = run_git_command("git clean -fd", problem_dir) + clean = run_git_command(["git", "clean", "-fd"], problem_dir) if clean.returncode != 0: return False, f"Git clean failed:\n{clean.stderr}" diff --git a/pyproject.toml b/pyproject.toml index 9863712..6babeb0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "aicodinggym-cli" -version = "0.2.0" +version = "0.5.1" description = "CLI tool for AI Coding Gym platform" readme = "README.md" requires-python = ">=3.10" @@ -37,3 +37,7 @@ packages = ["aicodinggym"] [tool.setuptools.package-dir] aicodinggym = "." + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"]