From 9c46829497c58e471826304134e00273433a3da4 Mon Sep 17 00:00:00 2001 From: Brent Rager Date: Sat, 20 Jun 2026 21:57:54 -0400 Subject: [PATCH 1/2] evals: add th config-based eval runner script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scripts/run-evals.sh fetches the LiteLLM gateway virtual key from @smooai/config via `th config get liteLLMVirtualKeyAiServer` (env-overridable) and runs the LLM-as-judge eval suite against the live gateway. Replaces reading the key out of opencode auth.json — @smooai/config is the single source of truth, fetched at run time and never printed. Validates the key is a sk- virtual key (the gateway 401s on non-virtual keys) and fails with a clear message otherwise. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/run-evals.sh | 58 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100755 scripts/run-evals.sh diff --git a/scripts/run-evals.sh b/scripts/run-evals.sh new file mode 100755 index 0000000..bbf6209 --- /dev/null +++ b/scripts/run-evals.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Run the smooth-operator LLM-as-judge eval suite against the live gateway, with +# the gateway virtual key fetched from @smooai/config via `th config`. +# +# This replaces the old "read the key out of opencode auth.json" step: the key is +# the single source of truth in @smooai/config, fetched at run time and never +# printed. +# +# Usage: +# scripts/run-evals.sh # run the default suite +# scripts/run-evals.sh --test llm_judge -- --nocapture # pass extra args to cargo test +# +# Env overrides: +# SMOOAI_GATEWAY_KEY_NAME th config key holding the sk- virtual key +# (default: liteLLMVirtualKeyAiServer) +# SMOOAI_CONFIG_ENV config environment to read (default: production) +# SMOOTH_AGENT_JUDGE_MODEL judge model (default: the harness CHEAP_MODEL); +# set e.g. claude-sonnet-4-5 for an adversarial grade +set -euo pipefail + +KEY_NAME="${SMOOAI_GATEWAY_KEY_NAME:-liteLLMVirtualKeyAiServer}" +CONFIG_ENV="${SMOOAI_CONFIG_ENV:-production}" + +if ! command -v th >/dev/null 2>&1; then + echo "error: 'th' CLI not found — install the smooth CLI to fetch config secrets" >&2 + exit 1 +fi + +# Fetch the gateway virtual key from @smooai/config. Never echoed. +SMOOAI_GATEWAY_KEY="$(th config get "$KEY_NAME" --environment="$CONFIG_ENV" --json 2>/dev/null \ + | python3 -c 'import sys,json; print(json.load(sys.stdin).get("value",""))')" + +if [ -z "${SMOOAI_GATEWAY_KEY}" ]; then + echo "error: th config returned no value for '$KEY_NAME' (env=$CONFIG_ENV)." >&2 + echo " Check the key name (SMOOAI_GATEWAY_KEY_NAME) and that you're logged in (th auth)." >&2 + exit 1 +fi + +case "$SMOOAI_GATEWAY_KEY" in + sk-*) : ;; # ok — a LiteLLM virtual key + *) + echo "error: '$KEY_NAME' is not a LiteLLM virtual key (expected to start with 'sk-')." >&2 + echo " The gateway rejects non-virtual keys with 401. Point SMOOAI_GATEWAY_KEY_NAME at a virtual key." >&2 + exit 1 + ;; +esac + +export SMOOAI_GATEWAY_KEY +export SMOOTH_AGENT_E2E=1 + +echo "[run-evals] gateway key: $KEY_NAME (env=$CONFIG_ENV) loaded; judge=${SMOOTH_AGENT_JUDGE_MODEL:-default}" + +# Default to the llm_judge suite; allow callers to override the cargo test args. +if [ "$#" -eq 0 ]; then + set -- -p smooai-smooth-operator-evals --test llm_judge -- --nocapture --test-threads=1 +fi + +exec cargo test "$@" From 0c2ccf129a14cafd13d189d97e57aa73440a2c5f Mon Sep 17 00:00:00 2001 From: Brent Rager Date: Sat, 20 Jun 2026 22:06:30 -0400 Subject: [PATCH 2/2] =?UTF-8?q?evals:=20fix=20th=20config=20fetch=20?= =?UTF-8?q?=E2=80=94=20pin=20prod=20env=20+=20org,=20force=20user-JWT=20au?= =?UTF-8?q?th?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three subtle env-inheritance bugs the wrapper hit: - Read the ambient SMOOAI_CONFIG_ENV (your local working env, usually `development`, where the key is the literal placeholder "unset") instead of production where the real key lives → now a dedicated SMOOAI_EVAL_GATEWAY_ENV defaulting to production. - Ambient @smooai/config M2M env vars (API_KEY/CLIENT_*/API_URL) override --org-id and resolve the wrong org → now unset for the fetch, forcing the th user JWT + explicit --org-id (cwd-independent). - Require SMOOAI_CONFIG_ORG_ID explicitly (the infra-secrets org) with a clear error rather than silently resolving a default org. Verified end-to-end: fetches the prod virtual key via th config and runs the llm_judge suite green. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/run-evals.sh | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/scripts/run-evals.sh b/scripts/run-evals.sh index bbf6209..a073e0c 100755 --- a/scripts/run-evals.sh +++ b/scripts/run-evals.sh @@ -13,21 +13,43 @@ # Env overrides: # SMOOAI_GATEWAY_KEY_NAME th config key holding the sk- virtual key # (default: liteLLMVirtualKeyAiServer) -# SMOOAI_CONFIG_ENV config environment to read (default: production) +# SMOOAI_EVAL_GATEWAY_ENV config environment the gateway key lives in +# (default: production). Deliberately NOT the ambient +# SMOOAI_CONFIG_ENV — that's your local working env +# (often `development`, where the key is just a +# placeholder), unrelated to where the prod key lives. +# SMOOAI_CONFIG_ORG_ID org whose config holds the key. `th config` reads +# the org from this env (set by direnv in the smooai +# monorepo); without it, th resolves a different/default +# org and returns the wrong value. Export it (or source +# the monorepo's .envrc) before running outside that repo. # SMOOTH_AGENT_JUDGE_MODEL judge model (default: the harness CHEAP_MODEL); # set e.g. claude-sonnet-4-5 for an adversarial grade set -euo pipefail KEY_NAME="${SMOOAI_GATEWAY_KEY_NAME:-liteLLMVirtualKeyAiServer}" -CONFIG_ENV="${SMOOAI_CONFIG_ENV:-production}" +CONFIG_ENV="${SMOOAI_EVAL_GATEWAY_ENV:-production}" if ! command -v th >/dev/null 2>&1; then echo "error: 'th' CLI not found — install the smooth CLI to fetch config secrets" >&2 exit 1 fi -# Fetch the gateway virtual key from @smooai/config. Never echoed. -SMOOAI_GATEWAY_KEY="$(th config get "$KEY_NAME" --environment="$CONFIG_ENV" --json 2>/dev/null \ +# The key lives in a specific org (SmooAI's infra-secrets / master org). We pin it +# explicitly and authenticate via the `th` user JWT, deliberately UNSETTING any +# ambient @smooai/config M2M env vars (SMOOAI_CONFIG_API_KEY / CLIENT_*) for the +# fetch — those are scoped to whatever org the surrounding direnv loaded and would +# otherwise override --org-id and return the wrong value. +if [ -z "${SMOOAI_CONFIG_ORG_ID:-}" ]; then + echo "error: SMOOAI_CONFIG_ORG_ID is not set." >&2 + echo " Set it to the org that holds '$KEY_NAME' (SmooAI's infra-secrets org)," >&2 + echo " e.g. export SMOOAI_CONFIG_ORG_ID=, then re-run." >&2 + exit 1 +fi + +# Fetch the gateway virtual key from @smooai/config via the user JWT. Never echoed. +SMOOAI_GATEWAY_KEY="$(env -u SMOOAI_CONFIG_API_KEY -u SMOOAI_CONFIG_CLIENT_ID -u SMOOAI_CONFIG_CLIENT_SECRET -u SMOOAI_CONFIG_API_URL \ + th config get "$KEY_NAME" --environment="$CONFIG_ENV" --org-id "$SMOOAI_CONFIG_ORG_ID" --json 2>/dev/null \ | python3 -c 'import sys,json; print(json.load(sys.stdin).get("value",""))')" if [ -z "${SMOOAI_GATEWAY_KEY}" ]; then