From 550d53e7c3153ec08d6f1d77ae0e021420d5dd20 Mon Sep 17 00:00:00 2001 From: Nandana Dileep Date: Mon, 1 Jun 2026 23:18:52 +0530 Subject: [PATCH] feat: add top-tools command and /api/tool-summary (#7) Expose aggregated per-tool-type token usage so users can see which tools consume the most context overall, via SQL aggregation, API, and a Rich CLI table. Co-authored-by: Cursor --- CHANGELOG.md | 6 + README.md | 1 + src/context_analyzer_tool/cli.py | 65 +++++++++++ src/context_analyzer_tool/collector/models.py | 12 ++ src/context_analyzer_tool/collector/routes.py | 25 ++++ src/context_analyzer_tool/db/tasks.py | 40 +++++++ tests/test_tool_summary.py | 110 ++++++++++++++++++ 7 files changed, 259 insertions(+) create mode 100644 tests/test_tool_summary.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0391521..a038114 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +### Added + +- **`context-analyzer-tool top-tools`** (#7) — ranked table of token usage by tool type (calls, total, average, share %). Backed by `/api/tool-summary`. + ## 0.3.1 (2026-04-08) ### Bug Fixes diff --git a/README.md b/README.md index 207f669..20b59f0 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,7 @@ context-analyzer-tool serve Start the collector server context-analyzer-tool dashboard Launch the live TUI dashboard context-analyzer-tool status View active sessions and recent tasks context-analyzer-tool anomalies List recent anomalies with root causes +context-analyzer-tool top-tools Rank tool types by total token usage context-analyzer-tool context-cost Show context cost breakdown context-analyzer-tool health Collector health check context-analyzer-tool rtk-status Show RTK integration status and savings diff --git a/src/context_analyzer_tool/cli.py b/src/context_analyzer_tool/cli.py index 2aa6d7b..5abafbb 100644 --- a/src/context_analyzer_tool/cli.py +++ b/src/context_analyzer_tool/cli.py @@ -826,6 +826,71 @@ def anomalies( ) +@app.command() +def top_tools( + port: int | None = typer.Option(None, help="Collector port (default: from config)"), + limit: int = typer.Option(10, help="Number of tool types to show"), + session_id: str | None = typer.Option(None, "--session", help="Filter by session"), +) -> None: + """Show which tool types consume the most tokens overall.""" + import httpx + + url = _collector_base_url(port) + params: dict[str, str | int] = {"limit": limit} + if session_id is not None: + params["session_id"] = session_id + + try: + with httpx.Client(timeout=5.0) as client: + resp = client.get(f"{url}/api/tool-summary", params=params) + resp.raise_for_status() + data = resp.json() + except httpx.ConnectError: + console.print( + "[red]Cannot connect to collector.[/red] " + "Is it running? Start with: [bold]context-analyzer-tool serve[/bold]" + ) + raise typer.Exit(1) from None + except httpx.HTTPError as exc: + console.print(f"[red]Error:[/red] {exc}") + raise typer.Exit(1) from None + + tools: list[dict[str, Any]] = data.get("tools", []) + total_tokens: int = int(data.get("total_tokens", 0)) + + if not tools: + console.print( + Panel("[dim]No tool usage with token estimates yet[/dim]", title="Top Tools") + ) + return + + table = Table(show_header=True, header_style="bold cyan", expand=True) + table.add_column("Rank", justify="right") + table.add_column("Tool") + table.add_column("Calls", justify="right") + table.add_column("Total Tokens", justify="right") + table.add_column("Avg / Call", justify="right") + table.add_column("Share", justify="right") + + for rank, tool in enumerate(tools, start=1): + tool_total = int(tool.get("total_tokens", 0)) + share = (tool_total / total_tokens * 100) if total_tokens else 0.0 + table.add_row( + str(rank), + str(tool.get("task_type", "")), + f"{int(tool.get('call_count', 0)):,}", + f"{tool_total:,}", + f"{float(tool.get('avg_tokens', 0)):,.0f}", + f"{share:.1f}%", + ) + + title = "Top Tools by Token Usage" + if session_id: + title += f" (session {session_id[:8]}...)" + subtitle = f"{total_tokens:,} tokens across {len(tools)} tool type(s)" + console.print(Panel(table, title=f"{title}\n[dim]{subtitle}[/dim]", border_style="blue")) + + @app.command() def dashboard( port: int | None = typer.Option(None, help="Collector port (default: from config)"), diff --git a/src/context_analyzer_tool/collector/models.py b/src/context_analyzer_tool/collector/models.py index a902f5a..11678a7 100644 --- a/src/context_analyzer_tool/collector/models.py +++ b/src/context_analyzer_tool/collector/models.py @@ -289,3 +289,15 @@ class AnomalyResponse(BaseModel): class AnomaliesListResponse(BaseModel): anomalies: list[AnomalyResponse] total_count: int + + +class ToolSummaryRow(BaseModel): + task_type: str + call_count: int + total_tokens: int + avg_tokens: float + + +class ToolSummaryResponse(BaseModel): + tools: list[ToolSummaryRow] + total_tokens: int diff --git a/src/context_analyzer_tool/collector/routes.py b/src/context_analyzer_tool/collector/routes.py index de2fbe9..f2bb309 100644 --- a/src/context_analyzer_tool/collector/routes.py +++ b/src/context_analyzer_tool/collector/routes.py @@ -25,6 +25,8 @@ StatuslineSnapshotRequest, StatusResponse, TaskResponse, + ToolSummaryResponse, + ToolSummaryRow, ) from context_analyzer_tool.db import anomalies as db_anomalies from context_analyzer_tool.db import baselines as db_baselines @@ -635,6 +637,29 @@ async def get_anomalies( return AnomaliesListResponse(anomalies=anomalies, total_count=total) +@api_router.get("/tool-summary", response_model=ToolSummaryResponse) +async def get_tool_summary( + limit: int = 20, + session_id: str | None = None, + db: aiosqlite.Connection = Depends(get_db), +) -> ToolSummaryResponse: + """Return aggregated token usage ranked by tool type.""" + rows = await db_tasks.get_tool_type_summary( + db, session_id=session_id, limit=limit if limit > 0 else None + ) + tools = [ + ToolSummaryRow( + task_type=r["task_type"], + call_count=int(r["call_count"]), + total_tokens=int(r["total_tokens"]), + avg_tokens=float(r["avg_tokens"]), + ) + for r in rows + ] + total_tokens = sum(tool.total_tokens for tool in tools) + return ToolSummaryResponse(tools=tools, total_tokens=total_tokens) + + @api_router.get("/rtk-status") async def get_rtk_status() -> dict[str, Any]: """Return RTK integration status and savings.""" diff --git a/src/context_analyzer_tool/db/tasks.py b/src/context_analyzer_tool/db/tasks.py index 8046a6f..107ba1a 100644 --- a/src/context_analyzer_tool/db/tasks.py +++ b/src/context_analyzer_tool/db/tasks.py @@ -260,3 +260,43 @@ async def get_null_delta_tasks( ) rows = await cursor.fetchall() return [dict(row) for row in rows] + + +async def get_tool_type_summary( + db: aiosqlite.Connection, + session_id: str | None = None, + limit: int | None = None, +) -> list[dict[str, Any]]: + """Aggregate token usage by task_type (tool name). + + Only rows with a non-null ``estimated_tokens`` value are included, + matching the metric used by the task cost timeline. + """ + clauses = ["estimated_tokens IS NOT NULL"] + params: list[str | int] = [] + + if session_id is not None: + clauses.append("session_id = ?") + params.append(session_id) + + where = "WHERE " + " AND ".join(clauses) + limit_clause = "" + if limit is not None: + limit_clause = " LIMIT ?" + params.append(limit) + + query = f""" + SELECT + task_type, + COUNT(*) AS call_count, + SUM(estimated_tokens) AS total_tokens, + AVG(estimated_tokens) AS avg_tokens + FROM tasks + {where} + GROUP BY task_type + ORDER BY total_tokens DESC + {limit_clause} + """ + cursor = await db.execute(query, params) + rows = await cursor.fetchall() + return [dict(row) for row in rows] diff --git a/tests/test_tool_summary.py b/tests/test_tool_summary.py new file mode 100644 index 0000000..926a804 --- /dev/null +++ b/tests/test_tool_summary.py @@ -0,0 +1,110 @@ +"""Tests for per-tool-type token aggregation (#7).""" + +from __future__ import annotations + +import time + +import aiosqlite +import pytest +from httpx import AsyncClient + +from context_analyzer_tool.db import events as db_events +from context_analyzer_tool.db import tasks as db_tasks + + +async def _insert_task_with_tokens( + db: aiosqlite.Connection, + *, + session_id: str, + task_type: str, + estimated_tokens: int | None, +) -> None: + ts = int(time.time() * 1000) + event_id = await db_events.insert_event( + db, + session_id=session_id, + event_type="PostToolUse", + timestamp_ms=ts, + payload_json="{}", + tool_name=task_type, + ) + await db_tasks.insert_task( + db, + session_id=session_id, + event_id=event_id, + task_type=task_type, + timestamp_ms=ts, + estimated_tokens=estimated_tokens, + ) + + +@pytest.mark.asyncio +async def test_get_tool_type_summary_aggregates_and_ranks( + db_connection: aiosqlite.Connection, +) -> None: + await _insert_task_with_tokens( + db_connection, session_id="s1", task_type="Bash", estimated_tokens=4500 + ) + await _insert_task_with_tokens( + db_connection, session_id="s1", task_type="Bash", estimated_tokens=1500 + ) + await _insert_task_with_tokens( + db_connection, session_id="s1", task_type="Read", estimated_tokens=3000 + ) + await _insert_task_with_tokens( + db_connection, session_id="s1", task_type="Edit", estimated_tokens=500 + ) + await _insert_task_with_tokens( + db_connection, session_id="s1", task_type="Edit", estimated_tokens=None + ) + + rows = await db_tasks.get_tool_type_summary(db_connection) + + assert len(rows) == 3 + assert rows[0]["task_type"] == "Bash" + assert rows[0]["call_count"] == 2 + assert rows[0]["total_tokens"] == 6000 + assert rows[1]["task_type"] == "Read" + assert rows[2]["task_type"] == "Edit" + assert rows[2]["call_count"] == 1 + + +@pytest.mark.asyncio +async def test_get_tool_type_summary_session_filter( + db_connection: aiosqlite.Connection, +) -> None: + await _insert_task_with_tokens( + db_connection, session_id="s1", task_type="Bash", estimated_tokens=1000 + ) + await _insert_task_with_tokens( + db_connection, session_id="s2", task_type="Read", estimated_tokens=9000 + ) + + rows = await db_tasks.get_tool_type_summary(db_connection, session_id="s2") + + assert len(rows) == 1 + assert rows[0]["task_type"] == "Read" + assert rows[0]["total_tokens"] == 9000 + + +@pytest.mark.asyncio +async def test_api_tool_summary( + app_client: AsyncClient, + db_connection: aiosqlite.Connection, +) -> None: + """GET /api/tool-summary returns ranked tool aggregates.""" + await _insert_task_with_tokens( + db_connection, session_id="s-api", task_type="Bash", estimated_tokens=4500 + ) + await _insert_task_with_tokens( + db_connection, session_id="s-api", task_type="Read", estimated_tokens=3000 + ) + + resp = await app_client.get("/api/tool-summary") + + assert resp.status_code == 200 + body = resp.json() + assert body["total_tokens"] == 7500 + assert len(body["tools"]) == 2 + assert body["tools"][0]["task_type"] == "Bash" + assert body["tools"][0]["total_tokens"] == 4500