hotdata-dev · zfarrell · Jun 5, 2026 · Jun 5, 2026 · claude · Jun 5, 2026
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
@@ -0,0 +1,93 @@
+name: Integration Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+  workflow_dispatch:
+
+jobs:
+  # Parity check runs on every PR and push: confirms every scenario in
+  # www.hotdata.dev/api/test-scenarios.yaml that is NOT opted out for the CLI
+  # has a matching test file here. www.hotdata.dev is private, so we fetch the
+  # manifest via the GitHub App token. hotdata-cli convention: tests/<name>.rs.
+  # Scenarios listing `cli` in optional_for are skipped (the CLI's surface
+  # doesn't cover them, e.g. datasets/secrets/saved-queries).
+  scenario-parity:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Generate GitHub App token
+        id: app-token
+        uses: actions/create-github-app-token@bcd2ba49218906704ab6c1aa796996da409d3eb1 # v3.2.0
+        with:
+          app-id: ${{ secrets.HOTDATA_AUTOMATION_APP_ID }}
+          private-key: ${{ secrets.HOTDATA_AUTOMATION_PRIVATE_KEY }}
+          owner: hotdata-dev
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
+        with:
+          python-version: '3.12'
+      - name: Install PyYAML
+        run: pip install --quiet pyyaml
+      - name: Fetch scenarios manifest
+        env:
+          GH_TOKEN: ${{ steps.app-token.outputs.token }}
+        run: |
+          curl -sS -f -L \
+            -H "Accept: application/vnd.github.v3.raw" \
+            -H "Authorization: Bearer $GH_TOKEN" \
+            https://api.github.com/repos/hotdata-dev/www.hotdata.dev/contents/api/test-scenarios.yaml \
+            -o test-scenarios.yaml
+      - name: Check parity
+        run: |
+          python3 - <<'PY'
+          import sys, pathlib, yaml
+          scenarios = yaml.safe_load(pathlib.Path("test-scenarios.yaml").read_text())["scenarios"]
+          missing = []
+          required = 0
+          for s in scenarios:
+              if "cli" in (s.get("optional_for") or []):
+                  continue
+              required += 1
+              expected = pathlib.Path("tests") / f"{s['name']}.rs"
+              if not expected.exists():
+                  missing.append(str(expected))
+          if missing:
+              print(f"::error::hotdata-cli is missing tests for {len(missing)} scenarios:")
+              for m in missing:
+                  print(f"  - {m}")
+              sys.exit(1)
+          print(f"All {required} required scenarios have corresponding test files (of {len(scenarios)} total).")
+          PY
+          rm -f test-scenarios.yaml
+
+  # Integration tests run against production. The shared harness
+  # (tests/common/mod.rs) skips cleanly when HOTDATA_SDK_TEST_API_KEY /
+  # HOTDATA_SDK_TEST_WORKSPACE_ID are absent (e.g. PRs from forks where secrets
+  # aren't injected), so this job stays green without credentials.
+  integration:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable
+      - name: Cache cargo
+        uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            target
+          key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-cargo-
+      - name: Run integration tests
+        env:
+          HOTDATA_SDK_TEST_API_URL: ${{ vars.HOTDATA_SDK_TEST_API_URL }}
+          HOTDATA_SDK_TEST_API_KEY: ${{ secrets.HOTDATA_SDK_TEST_API_KEY }}
+          HOTDATA_SDK_TEST_WORKSPACE_ID: ${{ vars.HOTDATA_SDK_TEST_WORKSPACE_ID }}
+          HOTDATA_SDK_TEST_CONNECTION_ID: ${{ vars.HOTDATA_SDK_TEST_CONNECTION_ID }}
+        # --no-fail-fast runs every scenario binary even after one fails, so a
+        # red run surfaces all failing scenarios at once.
+        run: cargo test --test '*' --no-fail-fast -- --nocapture
diff --git a/tests/auth_missing_token_401.rs b/tests/auth_missing_token_401.rs
@@ -0,0 +1,48 @@
+//! Scenario: auth_missing_token_401.
+//!
+//! A request with no credentials must be denied. The SDKs assert a literal 401
+//! from the server because they can construct an unauthenticated client; the
+//! CLI instead refuses *client-side* (it has no session, no api key, and
+//! nothing to mint a JWT from), so the meaningful CLI equivalent is: an
+//! authenticated command run with no credentials exits non-zero, reports an
+//! auth/not-configured error, and never prints a workspace listing.
+//!
+//! Although this scenario sends no credentials, it still gates on the standard
+//! test env (like sdk-python's `env` fixture) so `cargo test` with no secrets
+//! configured does not run a live, misleading path.
+
+mod common;
+
+#[test]
+fn auth_missing_token_401() {
+    // Gate on creds so offline CI skips cleanly (mirrors the SDK env fixture).
+    let _cli = skip_if_no_creds!();
+    let env = common::load_env();
+
+    // No api key, no session (isolated empty config), no workspace lock.
+    let output =
+        common::unauthenticated_output(&env.api_url, &["workspaces", "list", "-o", "json"]);
+
+    assert!(
+        !output.status.success(),
+        "workspaces list without credentials must fail; stdout:\n{}",
+        String::from_utf8_lossy(&output.stdout)
+    );
+
+    let stderr = String::from_utf8_lossy(&output.stderr).to_lowercase();
+    assert!(
+        stderr.contains("auth") || stderr.contains("log in") || stderr.contains("not configured"),
+        "expected an auth/not-configured error on stderr, got:\n{}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+
+    // Defensive: must not have leaked a successful JSON listing on stdout.
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    assert!(
+        serde_json::from_str::<serde_json::Value>(stdout.trim())
+            .ok()
+            .and_then(|v| v.as_array().map(|a| !a.is_empty()))
+            != Some(true),
+        "unauthenticated call leaked a workspace listing:\n{stdout}"
+    );
+}
diff --git a/tests/auth_unknown_workspace.rs b/tests/auth_unknown_workspace.rs
@@ -0,0 +1,44 @@
+//! Scenario: auth_unknown_workspace.
+//!
+//! A valid api key combined with a fabricated workspace id must be rejected and
+//! must never leak data from another workspace. The CLI mints a JWT from the
+//! real api key, then sends the fabricated id as the gateway-enforced
+//! `X-Workspace-Id`; the server responds 4xx (403/404). We assert the command
+//! exits non-zero and never prints a successful listing.
+
+mod common;
+
+#[test]
+fn auth_unknown_workspace() {
+    let cli = skip_if_no_creds!();
+
+    let fake_workspace = format!(
+        "ws_{:08x}{:08x}",
+        rand::random::<u32>(),
+        rand::random::<u32>()
+    );
+
+    // Real api key (no HOTDATA_WORKSPACE lock) + fabricated workspace via -w.
+    let output = cli
+        .cmd_unlocked_workspace()
+        .args(["connections", "list", "-w", &fake_workspace, "-o", "json"])
+        .output()
+        .expect("failed to spawn hotdata binary");
+
+    assert!(
+        !output.status.success(),
+        "connections list with fabricated workspace {fake_workspace} must fail \
+         (potential cross-workspace leak); stdout:\n{}",
+        String::from_utf8_lossy(&output.stdout)
+    );
+
+    // Defensive: must not have leaked a successful JSON listing on stdout.
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    assert!(
+        serde_json::from_str::<serde_json::Value>(stdout.trim())
+            .ok()
+            .and_then(|v| v.as_array().map(|a| !a.is_empty()))
+            != Some(true),
+        "fabricated workspace {fake_workspace} leaked a connection listing:\n{stdout}"
+    );
+}