diff --git a/CHANGELOG.md b/CHANGELOG.md index fdd91761..c2b48730 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -146,6 +146,28 @@ at positions other than 0. ## [1.3.1] - 2026-05-29 +### fix: strict chat templates reject mid-conversation system messages (#62) + +Qwen3 / Qwen3.5 chat templates (and other strict templates) under +llama.cpp `--jinja` raise `System message must be at the beginning.` and +llama.cpp returns HTTP 400 — but only when `tools` are present, since +that's when it compiles the template to build a tool-call grammar. +SmallCode injects system-role content mid-conversation (clarifier, plan +request, planner injection, path-validation warnings, skill activation, +compaction summaries), producing a messages array with `system` entries +at positions other than 0. + +- New `src/session/message_normalizer.js#consolidateSystemMessages()` + collapses all system-role messages into a single leading system + message (preserving order, de-duplicating identical blocks) and emits + only non-system turns after it. +- Applied in both request builders (`bin/smallcode.js` and + `bin/model_client.js` `chatCompletion`) right before the body is sent, + so it catches stray system messages regardless of which path injected + them. Verified end-to-end against a Qwen3 model: every tool-bearing + request now carries exactly one system message at index 0. +- Test coverage: `test/message_normalizer.test.js` (9 cases). + ### fix: compatibility issues #57, #58, #59 Three reported environment-compatibility bugs: diff --git a/README.md b/README.md index 062899c3..88da0530 100644 --- a/README.md +++ b/README.md @@ -142,8 +142,16 @@ SMALLCODE_BASE_URL=http://localhost:1234/v1 # OPENAI_API_KEY=sk-... # OPENROUTER_API_KEY=sk-or-v1-... # DEEPSEEK_API_KEY=sk-... + +# Optional: model response timeout in seconds (default 300 / 5 min). +# Raise this for slow CPU-only llama.cpp servers that need >5 min per turn. +# SMALLCODE_MODEL_TIMEOUT=1800 ``` +The model response timeout can also be set in `smallcode.toml` under `[model]` +as `timeout = `. If a turn exceeds it you'll see +`timeout: no response after s` — raise `SMALLCODE_MODEL_TIMEOUT` to fix. + See `.env.example` for all options. Also supports `smallcode.toml` for backwards compatibility. SmallCode can route each model tier to a different endpoint. This lets you keep diff --git a/agents/code-engineer.md b/agents/code-engineer.md new file mode 100644 index 00000000..f1a06bcb --- /dev/null +++ b/agents/code-engineer.md @@ -0,0 +1,31 @@ +--- +name: code-engineer +description: Primary implementer for any coding task — implementation, refactoring, debugging, code review. +model: medium +tools: [read_file, find_files, search, write_file, append_file, patch, bash, run_tests, run] +--- + +You are the code-engineer — a senior engineer and the primary coding agent. You write clean, idiomatic code, match existing patterns, and ship working solutions. + +## Operating Principles + +- Read before writing: understand existing patterns before adding new code. +- Match conventions: if the codebase uses X, use X. +- Minimum viable change: fix the thing, don't refactor everything nearby. +- Verify your work: run run_tests or bash checks after changes. + +## Code Quality Non-Negotiables + +- No empty catch blocks. No TODOs in delivered code. Fix root causes, not symptoms. + +## When to Escalate + +Delegate complex architecture to oracle, external docs to librarian, codebase discovery to scout, test writing to qa-tester. + +## Workflow + +1. Explore relevant code (find_files, search, read_file). +2. Plan briefly — a mental model, not a document. +3. Implement using write_file, patch, or append_file. +4. Verify with run_tests or bash. +5. Report concisely: what changed, why, outcome. diff --git a/agents/critic.md b/agents/critic.md new file mode 100644 index 00000000..efe934b4 --- /dev/null +++ b/agents/critic.md @@ -0,0 +1,33 @@ +--- +name: critic +description: Ruthless post-implementation verifier — rejects work that doesn't meet spec. Read-only except running checks. +model: medium +tools: [read_file, find_files, search, bash, run_tests] +--- + +You are the quality critic — the final gate before anything ships. You ruthlessly verify that work meets its requirements. You do not rubber-stamp. If something is wrong, you reject it with specifics. + +## How You Work + +1. Read the spec or requirements: understand exactly what was required. +2. Read the implementation: every changed file. +3. Verify line by line: does the code do what was required? Any stubs, TODOs, or logic errors? +4. Run checks: use run_tests and bash to verify, not just read. +5. Report with a clear verdict. + +## Output Format + +``` +Files reviewed: [list] +Issues found: +- CRITICAL: [file:line] — [specific issue] +- WARNING: [file:line] — [issue] + +VERDICT: OKAY / REJECT +``` + +If REJECT: explain exactly what must be fixed. Never approve with reservations — "probably fine" = REJECT. + +## Rejection Triggers + +Any stub or TODO in delivered code; logic that doesn't match spec; missing error handling; unverified claims; scope creep. diff --git a/agents/debugger.md b/agents/debugger.md new file mode 100644 index 00000000..e9f17596 --- /dev/null +++ b/agents/debugger.md @@ -0,0 +1,31 @@ +--- +name: debugger +description: Systematic root-cause diagnosis — reproduce, hypothesize, test, fix, verify. +model: medium +tools: [read_file, find_files, search, bash, run_tests, patch] +--- + +You are the debugger — a systematic root-cause diagnostician. Your role is to find WHY something is broken, not just make it work. Follow the scientific method: observe, hypothesize, test, conclude. + +## How You Work + +1. Reproduce: confirm the bug exists; understand the exact failure mode using run_tests or bash. +2. Gather evidence: read error logs, stack traces, and relevant code paths with read_file and search. +3. Form hypotheses: list 2–3 plausible root causes, ranked by likelihood. +4. Test systematically: eliminate hypotheses one by one with targeted bash or run_tests checks. +5. Fix: use patch to implement the minimal fix for the confirmed root cause. +6. Verify: run_tests confirms the fix resolves the issue without regression. + +## Principles + +Never guess-and-check randomly. Each action tests a specific hypothesis. Check recent changes (bash git log) — most bugs come from recent commits. If a fix works but you don't understand why, keep investigating. + +## Output Format + +``` +SYMPTOM: [what's happening] +EVIDENCE: [key observations] +ROOT CAUSE: [confirmed cause] +FIX: [what was changed and why] +VERIFICATION: [how confirmed] +``` diff --git a/agents/documenter.md b/agents/documenter.md new file mode 100644 index 00000000..50649ed8 --- /dev/null +++ b/agents/documenter.md @@ -0,0 +1,29 @@ +--- +name: documenter +description: Writes and updates docs — READMEs, inline comments, usage examples — matching the project's existing style. +model: fast +tools: [read_file, find_files, search, write_file, append_file, patch] +--- + +You are a documentation agent. Write clear, concise documentation that matches the project's existing style and voice. + +## How You Work + +1. Survey existing docs: use find_files and read_file to understand the project's documentation style, tone, and structure. +2. Survey the code: use search and read_file to understand what needs documenting. +3. Write or update: use write_file, append_file, or patch to add or revise docs. + +## What You Produce + +- README files (top-level and per-module). +- Inline code comments for non-obvious logic. +- Usage examples with working code snippets. +- API reference tables (function signatures, parameters, return values). +- Migration or changelog entries when appropriate. + +## Style Rules + +- Match the existing doc tone exactly — don't introduce new conventions. +- Be concise: say what it does, not how the implementation works. +- Code examples must be accurate — verify against the actual source. +- No placeholder text or TODOs in delivered docs. diff --git a/agents/general-purpose.md b/agents/general-purpose.md new file mode 100644 index 00000000..275c3b3f --- /dev/null +++ b/agents/general-purpose.md @@ -0,0 +1,28 @@ +--- +name: general-purpose +description: Catch-all agent for open-ended, multi-step tasks — research, content authoring, and text transformation (e.g. remastering/rewriting a section per a prompt or spec). Use when no more specific agent fits. +model: medium +tools: [read_file, find_files, search, hybrid_search, write_file, append_file, patch, bash, run_tests, run, memory_load] +--- + +You are the general-purpose agent — the default for tasks that don't fit a specialist. You handle research, multi-step work, and especially **content authoring and text transformation**: rewriting, remastering, summarizing, or generating a document from source material and an instruction. + +## Operating Principles + +- Understand the contract first. If the task names a prompt/template (e.g. a file under `prompts/`) or a spec, read it and follow it exactly — it defines the output's structure, voice, and rules. +- Read the source fully before writing. For a remaster/rewrite, read the input section AND any sibling examples so your output matches the established style. +- Match conventions: headings, tags, numbering, and formatting the surrounding files already use. +- Produce the actual artifact. Write the output to the file path the task specifies (write_file for new files, append_file to build large files in chunks, patch for edits) — don't just describe what you would do. +- Verify what you can: re-read your output, run any lint/check command the task mentions. + +## Workflow + +1. Read the instruction/prompt + the source material (read_file, find_files, search). +2. Author the output, following the prompt's structure and the project's conventions. +3. Write it to the specified path; for long content, write a first chunk then append the rest. +4. Sanity-check the result (re-read; run any stated verify/lint command). +5. Report concisely: what you produced, where, and any caveats. + +## When to Escalate + +Defer deep architecture to oracle, codebase discovery to scout, dedicated test authoring to qa-tester, and external library research to librarian. diff --git a/agents/librarian.md b/agents/librarian.md new file mode 100644 index 00000000..a6a72716 --- /dev/null +++ b/agents/librarian.md @@ -0,0 +1,30 @@ +--- +name: librarian +description: External docs and library best-practices lookup — official references, real-world examples, GitHub repo discovery. +model: default +tools: [read_file, search, web_search, web_fetch, memory_load] +--- + +You are the librarian — a reference researcher who finds external documentation, code examples, and best practices from outside the codebase. + +## How You Work + +1. Clarify what specifically is needed: library name, version, use case, language target. +2. Check memory_load for any previously cached findings on the same topic. +3. Search: use web_search for official docs, GitHub repos, and community resources. +4. Fetch: use web_fetch to retrieve specific pages, changelogs, or API references. +5. Verify by cross-checking multiple sources before synthesizing. +6. Synthesize: return structured findings with source URLs, not raw search dumps. + +## What You Research + +- Official library and framework documentation. +- Real-world code examples from production repositories. +- Best practices, community conventions, security advisories. +- Changelogs and migration guides. +- API references and type definitions. +- GitHub repo discovery and evaluation. + +## Stop Conditions + +Stop when: a direct answer is found from an authoritative source; the same information is confirmed in 2+ independent sources; or 2 search iterations yield no new useful data. Always cite source URLs. diff --git a/agents/oracle.md b/agents/oracle.md new file mode 100644 index 00000000..4802bf1e --- /dev/null +++ b/agents/oracle.md @@ -0,0 +1,33 @@ +--- +name: oracle +description: Read-only architecture advisor — deep analysis, hard debugging, security and performance consulting. +model: strong +tools: [read_file, find_files, search, graph_search, explain_symbol] +--- + +You are the oracle — a read-only, high-reasoning consultant. You analyze deeply, reason carefully, and advise. You never write or modify files. + +## When You Are Invoked + +- Complex architecture decisions with real tradeoffs. +- Hard debugging after 2+ failed attempts by other agents. +- Security or performance concerns requiring deep analysis. +- Multi-system design decisions or technical debt assessment. + +## How You Work + +1. Read deeply: use read_file, search, graph_search, and explain_symbol to understand full context before forming any opinion. +2. Analyze trade-offs: present multiple approaches with pros and cons. +3. Identify root causes: go past symptoms to underlying problems. +4. Give a clear recommendation: one primary path with explicit rationale. +5. List risks: what could go wrong with your recommendation. + +## Output Format + +- Summary of the problem as understood. +- Analysis of approaches considered. +- Recommendation with rationale. +- Key risks and mitigations. +- Concrete next steps for the implementing agent. + +You are READ-ONLY. Everything you produce is advice. diff --git a/agents/planner.md b/agents/planner.md new file mode 100644 index 00000000..acb58db8 --- /dev/null +++ b/agents/planner.md @@ -0,0 +1,31 @@ +--- +name: planner +description: Read-only; researches the codebase and produces a numbered, verifiable step plan before implementation. +model: medium +tools: [read_file, find_files, search, hybrid_search, graph_search] +--- + +You are the strategic planner. Your role is to research the codebase and generate structured work plans. You do not implement — you plan. + +## How You Work + +### Phase 1: Clarify + +Identify the verb the user used (add, refactor, reorganize, rewrite). Your plan scope must not exceed that verb. If an adjacent improvement is out of scope, note it separately and do not include it in the task list. + +### Phase 2: Research + +Use find_files, search, hybrid_search, and graph_search to understand the codebase before writing the plan. + +### Phase 3: Plan Generation + +Produce a plan with: +- TL;DR and deliverables. +- Context and research findings. +- Work objectives with "Must Have" and "Must NOT" sections. +- Numbered task list, each with clear acceptance criteria. +- Wave structure indicating which tasks can run in parallel. + +### Phase 4: Clearance Check + +Before finalizing: are all requirements clear? All gaps resolved? If not, ask one targeted question. diff --git a/agents/qa-tester.md b/agents/qa-tester.md new file mode 100644 index 00000000..b3dfd8a0 --- /dev/null +++ b/agents/qa-tester.md @@ -0,0 +1,27 @@ +--- +name: qa-tester +description: Writes tests, builds test suites, and discovers edge cases across unit, integration, and E2E levels. +model: default +tools: [read_file, find_files, search, write_file, append_file, patch, bash, run_tests] +--- + +You are the QA tester — a testing specialist who writes comprehensive, meaningful tests. You write tests that catch real bugs, not tests that just inflate coverage numbers. + +## How You Work + +1. Understand: use read_file and search to understand the code under test and its requirements. +2. Identify test cases: happy path, edge cases, error conditions, boundary values (0, -1, MAX, empty, null). +3. Write tests: clear, isolated, deterministic. Use write_file or patch to add them. +4. Run tests: use run_tests or bash to verify they pass (and fail when they should). +5. Report coverage gaps: what isn't tested and why it matters. + +## Testing Principles + +- Test behavior, not implementation — tests must survive refactors. +- One assertion per concept. Descriptive test names. +- No test interdependence — each test runs in isolation. +- Match the existing test framework and patterns in the project. + +## Gap Warning Triggers + +Public function with no tests; uncovered error paths; boundary conditions unchecked; async race conditions; state mutations without verification. diff --git a/agents/red-team.md b/agents/red-team.md new file mode 100644 index 00000000..fc56e442 --- /dev/null +++ b/agents/red-team.md @@ -0,0 +1,27 @@ +--- +name: red-team +description: Adversarial security reviewer — find vulnerabilities, injection risks, exposed secrets, and failure modes. Read-only probing. +model: medium +tools: [read_file, find_files, search, bash] +--- + +You are a red team agent. Your role is to find security vulnerabilities, edge cases, and failure modes before attackers do. You probe, you don't patch. + +## How You Work + +1. Map the attack surface: use find_files and search to locate entry points, user inputs, auth boundaries, and external calls. +2. Probe for vulnerabilities: read_file to inspect code; bash for safe static analysis (grep for patterns, no live network calls). +3. Enumerate failure modes: what happens with malformed input, missing auth, concurrent access, or resource exhaustion? + +## What You Look For + +- Injection risks (SQL, shell, path traversal, template). +- Exposed secrets or credentials in code or config. +- Missing or bypassable authentication and authorization. +- Unsafe defaults or overly permissive configurations. +- Unhandled errors that leak internal state. +- SSRF, open redirects, insecure deserialization. + +## Output Format + +Report findings with severity (CRITICAL / HIGH / MEDIUM / LOW), affected file:line, and a concrete reproduction scenario. Do NOT modify files — findings only. diff --git a/agents/scout.md b/agents/scout.md new file mode 100644 index 00000000..fc5424e9 --- /dev/null +++ b/agents/scout.md @@ -0,0 +1,21 @@ +--- +name: scout +description: Fast read-only codebase recon — find files, patterns, functions, and entry points. +model: fast +tools: [read_file, find_files, search, hybrid_search, graph_search, explain_symbol] +--- + +You are the scout — fast, read-only discovery of patterns and structure in the codebase. + +Your role is precise, high-speed exploration. Find things quickly and return structured results. Never modify files — just accurate discovery. + +## How You Work + +1. Parse the query: identify what to find (file, pattern, function, import, symbol). +2. Choose the right tool: use search or hybrid_search for content patterns, find_files for file names, read_file for detail, graph_search or explain_symbol for structural relationships. +3. Parallelize: run independent searches simultaneously. +4. Return precise results: file paths, line numbers, relevant snippets. + +## Output Format + +Always include: file path, line reference, relevant code snippet. For large result sets, group by file and summarize patterns. Keep output tight — no padding, no suggestions, just what was found. diff --git a/bin/commands.js b/bin/commands.js index 9ba5ebc8..0016e5e2 100644 --- a/bin/commands.js +++ b/bin/commands.js @@ -307,9 +307,42 @@ module.exports = function createCommandHandler(config, conversationHistory, impr } catch (e) { console.log(chalk.gray(` Error: ${e.message}`)); } + } else if (sub === 'hygiene') { + try { + const { runHygiene } = require('../src/memory/hygiene'); + const result = runHygiene(memoryStore); + console.log(chalk.green(` ✓ Hygiene complete: ${result.archived} archived, ${result.deleted} deleted`)); + // Also write MEMORY.md index + const { renderMemoryIndex } = require('../src/memory/hygiene'); + const md = renderMemoryIndex(memoryStore); + const fs = require('fs'); + const path = require('path'); + const outDir = path.join(process.cwd(), '.smallcode'); + if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true }); + fs.writeFileSync(path.join(outDir, 'MEMORY.md'), md); + console.log(chalk.gray(` Wrote .smallcode/MEMORY.md (${memoryStore.all().length} entries)`)); + } catch (e) { + console.log(chalk.gray(` Hygiene error: ${e.message}`)); + } + } else if (sub === 'index') { + try { + const { renderMemoryIndex } = require('../src/memory/hygiene'); + const md = renderMemoryIndex(memoryStore); + const fs = require('fs'); + const path = require('path'); + const outDir = path.join(process.cwd(), '.smallcode'); + if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true }); + fs.writeFileSync(path.join(outDir, 'MEMORY.md'), md); + console.log(chalk.green(` ✓ Wrote .smallcode/MEMORY.md`)); + console.log(md.split('\n').slice(0, 10).map(l => ' ' + l).join('\n')); + } catch (e) { + console.log(chalk.gray(` Index error: ${e.message}`)); + } } else { - console.log(chalk.gray(' /memory List stored memory')); - console.log(chalk.gray(' /memory clear Clear all memory')); + console.log(chalk.gray(' /memory List stored memory')); + console.log(chalk.gray(' /memory clear Clear all memory')); + console.log(chalk.gray(' /memory hygiene Sweep tiers, prune stale entries, write MEMORY.md')); + console.log(chalk.gray(' /memory index Write .smallcode/MEMORY.md without sweeping')); } console.log(''); rl.prompt(); @@ -829,6 +862,114 @@ module.exports = function createCommandHandler(config, conversationHistory, impr return; } + case '/agents': { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const loader = new AgentLoader(process.cwd()); + const agents = loader.list(); + if (agents.length === 0) { + console.log(chalk.gray(' No agents defined.')); + console.log(chalk.gray(' Create one: .smallcode/agents/.md')); + } else { + console.log(chalk.bold(` Agents (${agents.length}):`)); + for (const a of agents) { + const toolList = a.tools.length ? chalk.gray(` [${a.tools.join(', ')}]`) : ''; + const modelTag = a.model ? chalk.gray(` model:${a.model}`) : ''; + console.log(` ${chalk.cyan(a.name)}${toolList}${modelTag} ${chalk.gray(a.description)}`); + } + } + console.log(''); + rl.prompt(); + return; + } + + case '/agent': { + const agentName = parts[1]; + const agentTask = parts.slice(2).join(' '); + if (!agentName || !agentTask) { + console.log(chalk.gray(' Usage: /agent ')); + console.log(''); + rl.prompt(); + return; + } + const { AgentLoader: AgentLoaderA } = require('../src/plugins/agent_loader'); + const { AgentRunner } = require('../src/plugins/agent_runner'); + const loaderA = new AgentLoaderA(process.cwd()); + const agentDef = loaderA.get(agentName); + if (!agentDef) { + const valid = loaderA.list().map(a => a.name); + console.log(chalk.red(` Agent "${agentName}" not found. Valid: ${valid.join(', ') || '(none)'}`)); + console.log(''); + rl.prompt(); + return; + } + console.log(chalk.gray(` Running agent ${chalk.cyan(agentName)}...`)); + const agentCtxA = { config, flags: {}, tui: require('./tui'), skillManager: null }; + const runnerA = new AgentRunner(agentDef, agentCtxA); + const resultA = await runnerA.run(agentTask); + console.log(''); + console.log(resultA.output || chalk.gray('(no output)')); + console.log(''); + console.log(chalk.gray(` steps=${resultA.steps} tokens=${resultA.tokens}${resultA.error ? ' error=' + resultA.error : ''}`)); + console.log(''); + rl.prompt(); + return; + } + + case '/teams': { + const { TeamLoader } = require('../src/plugins/team_loader'); + const tloader = new TeamLoader(process.cwd()); + const teams = tloader.list(); + if (teams.length === 0) { + console.log(chalk.gray(' No teams defined.')); + console.log(chalk.gray(' Create one: .smallcode/teams/.yaml')); + } else { + console.log(chalk.bold(` Teams (${teams.length}):`)); + for (const t of teams) { + console.log(` ${chalk.cyan(t.name)} ${chalk.gray(`[${t.agents.join(' → ')}]`)} ${chalk.gray(t.description)}`); + } + } + console.log(''); + rl.prompt(); + return; + } + + case '/team': { + const teamName = parts[1]; + const teamTask = parts.slice(2).join(' '); + if (!teamName || !teamTask) { + console.log(chalk.gray(' Usage: /team ')); + console.log(''); + rl.prompt(); + return; + } + const { TeamLoader: TeamLoaderT } = require('../src/plugins/team_loader'); + const { AgentLoader: AgentLoaderT } = require('../src/plugins/agent_loader'); + const { runTeam } = require('../src/plugins/team_runner'); + const tloaderT = new TeamLoaderT(process.cwd()); + const teamDef = tloaderT.get(teamName); + if (!teamDef) { + const valid = tloaderT.list().map(t => t.name); + console.log(chalk.red(` Team "${teamName}" not found. Valid: ${valid.join(', ') || '(none)'}`)); + console.log(''); + rl.prompt(); + return; + } + console.log(chalk.gray(` Running team ${chalk.cyan(teamName)} (${teamDef.agents.join(' → ')})...`)); + const agentLoaderT = new AgentLoaderT(process.cwd()); + const teamCtx = { config, flags: {}, tui: require('./tui'), skillManager: null }; + const teamResult = await runTeam(teamDef, teamTask, teamCtx, agentLoaderT); + console.log(''); + console.log(teamResult.output || chalk.gray('(no output)')); + console.log(''); + for (const pa of teamResult.perAgent) { + const err = pa.error ? chalk.red(` error=${pa.error}`) : ''; + console.log(chalk.gray(` ${pa.name}: steps=${pa.steps} tokens=${pa.tokens}${err}`)); + } + console.log(''); + rl.prompt(); + return; + } + case '/help': console.log(''); console.log(chalk.bold(' Commands')); @@ -851,6 +992,11 @@ module.exports = function createCommandHandler(config, conversationHistory, impr console.log(` ${chalk.cyan('/budget')} ${chalk.gray('Show context window budget')}`); console.log(` ${chalk.cyan('/mcp')} ${chalk.gray('Show connected MCP servers')}`); console.log(` ${chalk.cyan('/skill')} ${chalk.gray('Manage reusable skills')}`); + console.log(` ${chalk.cyan('/agents')} ${chalk.gray('List defined sub-agents')}`); + console.log(` ${chalk.cyan('/agent')} ${chalk.gray('Run a sub-agent manually')}`); + console.log(` ${chalk.cyan('/teams')} ${chalk.gray('List defined agent teams')}`); + console.log(` ${chalk.cyan('/team')} ${chalk.gray('Run a team pipeline')}`); + console.log(` ${chalk.cyan('/evolve')} ${chalk.gray('Propose a new skill from session friction (list|promote|log)')}`); console.log(` ${chalk.cyan('/plugin')} ${chalk.gray('List installed plugins')}`); console.log(` ${chalk.cyan('/provider')} ${chalk.gray('Configure LLM provider (interactive wizard)')}`); console.log(` ${chalk.cyan('/sessions')} ${chalk.gray('List/resume saved sessions')}`); @@ -863,6 +1009,171 @@ module.exports = function createCommandHandler(config, conversationHistory, impr rl.prompt(); return; + case '/evolve': { + const { SkillManager } = require('../src/plugins/skills'); + const sm = new SkillManager(process.cwd()); + const sub = (parts[1] || '').trim(); + + if (sub === 'list') { + const drafts = sm.listDrafts(); + if (drafts.length === 0) { + console.log(chalk.gray(' No skill drafts. Run /evolve to analyze recent sessions.')); + } else { + console.log(chalk.bold(` Drafts (${drafts.length}) — promote with /evolve promote :`)); + for (const d of drafts) console.log(` ${chalk.cyan(d)}`); + } + console.log(''); + rl.prompt(); + return; + } + + if (sub === 'promote') { + const name = (parts[2] || '').trim(); + if (!name) { console.log(chalk.gray(' Usage: /evolve promote ')); } + else { + const target = sm.promoteDraft(name); + if (target) console.log(` ${chalk.green('✓')} Promoted to ${chalk.cyan(target)} — active next session.`); + else console.log(chalk.red(` Draft "${name}" not found (or a live skill with that name exists).`)); + } + console.log(''); + rl.prompt(); + return; + } + + if (sub === 'log') { + const { readEntries } = require('../src/plugins/audit_log'); + const entries = readEntries(path.join(process.cwd(), '.smallcode', 'evolver-audit.jsonl'), 10); + if (entries.length === 0) console.log(chalk.gray(' No evolution events logged yet.')); + for (const e of entries) { + console.log(` ${chalk.gray(e.ts)} ${chalk.cyan(e.name)} ${chalk.gray(e.rationale.slice(0, 60))}`); + } + console.log(''); + rl.prompt(); + return; + } + + // No sub-command: run an evolution pass + const { TraceRecorder } = require('./trace_recorder'); + const { extractFrictionSignals, formatReportForPrompt } = require('../src/plugins/friction_analyzer'); + const evolver = require('../src/plugins/evolver'); + + const tr = new TraceRecorder(process.cwd()); + const traceList = tr.list().slice(0, 20); + if (traceList.length < 3) { + console.log(chalk.gray(` Only ${traceList.length} trace(s) recorded — need at least 3 sessions of data.`)); + console.log(''); + rl.prompt(); + return; + } + const traces = traceList.map(t => tr.load(t.id)).filter(Boolean); + + const skillKeywords = sm.list().flatMap(s => s.keywords || []); + const report = extractFrictionSignals(traces, { skillKeywords }); + const signalCount = report.repeated_patterns.length + report.tool_retry_loops.length; + if (signalCount === 0) { + console.log(chalk.gray(` No friction patterns in last ${traces.length} traces. Nothing to evolve.`)); + console.log(''); + rl.prompt(); + return; + } + + console.log(chalk.bold(` Friction signals (${signalCount}):`)); + console.log(chalk.gray(formatReportForPrompt(report).split('\n').map(l => ' ' + l).join('\n'))); + + // LLM judgment — route to the strong tier when configured + const { getModelTarget, buildAuthHeaders, withModelTarget } = require('./config'); + const target = getModelTarget(config, 'strong'); + process.stdout.write(chalk.gray(` Asking ${target.model} for a proposal... `)); + + const sysPrompt = 'You design reusable skills for a coding agent. A skill is a short markdown instruction injected when relevant. Given friction signals from recent sessions, propose ONE skill addressing the most impactful pattern. Respond with ONLY a JSON object: {"name": "kebab-case-name", "description": "one line", "trigger": "match", "keywords": ["k1","k2"], "body": "markdown instructions for the agent", "rationale": "why this helps"}'; + let proposalRaw = null; + try { + const resp = await fetch(`${target.baseUrl}/chat/completions`, { + method: 'POST', + headers: buildAuthHeaders(withModelTarget(config, target)), + body: JSON.stringify({ + model: target.model, + messages: [ + { role: 'system', content: sysPrompt }, + { role: 'user', content: `Friction signals:\n${formatReportForPrompt(report)}` }, + ], + temperature: 0.2, + max_tokens: 1024, + }), + }); + if (resp.ok) { + const data = await resp.json(); + proposalRaw = data?.choices?.[0]?.message?.content || null; + } else { + console.log(chalk.red(`HTTP ${resp.status}`)); + } + } catch (e) { + console.log(chalk.red(e.message)); + } + if (!proposalRaw) { console.log(''); rl.prompt(); return; } + + // Forgiving parse: strict JSON → fenced JSON → abort with raw output + let parsed = null; + try { parsed = JSON.parse(proposalRaw); } catch { + const m = proposalRaw.match(/\{[\s\S]*\}/); + if (m) { try { parsed = JSON.parse(m[0]); } catch {} } + } + if (!parsed) { + console.log(chalk.yellow('could not parse')); + console.log(chalk.gray(' Raw model output (nothing written):')); + console.log(chalk.gray(' ' + proposalRaw.slice(0, 500).split('\n').join('\n '))); + console.log(''); + rl.prompt(); + return; + } + console.log(chalk.green('ok')); + + const proposal = evolver.buildSkillProposal( + String(parsed.name || ''), String(parsed.description || ''), String(parsed.body || ''), + { trigger: parsed.trigger, keywords: parsed.keywords, rationale: String(parsed.rationale || '') } + ); + const errors = evolver.validateProposal(proposal); + if (errors.length) { + console.log(chalk.red(` Proposal rejected: ${errors.join('; ')}`)); + console.log(''); + rl.prompt(); + return; + } + const collision = evolver.checkNameCollision(proposal.name, process.cwd()); + if (collision) { + console.log(chalk.red(` Name collision with ${collision} — nothing written.`)); + console.log(''); + rl.prompt(); + return; + } + + const run = new evolver.EvolverRun(); + const draftPath = run.writeDraft(proposal, process.cwd()); + evolver.logCreateEvent( + path.join(process.cwd(), '.smallcode', 'evolver-audit.jsonl'), + proposal, proposal.rationale, + report.repeated_patterns.flatMap(p => p.traceIds).concat(report.tool_retry_loops.flatMap(l => l.traceIds)) + ); + + console.log(''); + console.log(` ${chalk.green('✓')} Draft: ${chalk.cyan(draftPath)}`); + console.log(chalk.gray(` "${proposal.description}"`)); + console.log(chalk.gray(` Review the file, then: /evolve promote ${proposal.name}`)); + console.log(''); + rl.prompt(); + return; + } + + case '/live': { + // Toggle the live activity feed features (issue #77). + const { resolveLiveCommand } = require('./live_settings'); + const res = resolveLiveCommand(parts.slice(1).join(' ')); + console.log(res.text); + console.log(''); + rl.prompt(); + return; + } + case '/provider': { const sub = (parts[1] || '').trim(); if (sub === 'status' || sub === '--status' || sub === '-s') { @@ -870,7 +1181,7 @@ module.exports = function createCommandHandler(config, conversationHistory, impr console.log(pProviderStatus()); } else { const pWizard = require('./provider-wizard/wizard'); - const result = await pWizard.runWizard({ interactive: true }); + const result = await pWizard.runWizard({ interactive: true, rl }); if (result.success) { console.log(result.provider || ''); } diff --git a/bin/executor.js b/bin/executor.js index 87314a30..9fba20a9 100644 --- a/bin/executor.js +++ b/bin/executor.js @@ -812,6 +812,19 @@ async function executeTool(name, args, ctx) { const objects = Array.isArray(raw) ? raw : (raw?.objects || []); const tokens_used = Array.isArray(raw) ? objects.length * 50 : (raw?.tokens_used || 0); if (objects.length === 0) return { result: 'No relevant memory found.' }; + // Touch last_used_at so hygiene tier sweeps see real usage — an + // actively-retrieved entry must not age out. Never breaks retrieval. + for (const o of objects) { + try { + const now = new Date().toISOString(); + if (typeof memoryStore.update === 'function') { + memoryStore.update(o.id, { last_used_at: now }); + } else { + o.last_used_at = now; + if (typeof memoryStore.save === 'function') memoryStore.save(); + } + } catch {} + } const formatted = objects.map(o => `[${o.type}] ${o.title}: ${o.content}`).join('\n\n'); return { result: `Loaded ${objects.length} memories (${tokens_used} tokens):\n\n${formatted}` }; } @@ -840,6 +853,54 @@ async function executeTool(name, args, ctx) { return { result: '' }; } + case 'use_skill': { + const skillManager = ctx.skillManager || null; + if (!skillManager) return { error: 'use_skill: skill system not available' }; + const skillName = String(args.name || '').trim(); + if (!skillName) return { error: 'use_skill: name is required' }; + const skill = skillManager.get(skillName); + if (!skill) { + const validNames = skillManager.getIndex().map(e => e.name).slice(0, 10); + return { error: `use_skill: skill "${skillName}" not found. Valid names: ${validNames.join(', ')}` }; + } + const { formatSkillResult } = require('../src/plugins/skill_index_formatter'); + const index = skillManager.getIndex(); + const relatedEntries = (skill.related || []) + .map(r => index.find(e => e.name === r)) + .filter(Boolean); + return { result: formatSkillResult(skill, relatedEntries) }; + } + + case 'spawn_agent': { + const agentName = String(args.agent || '').trim(); + const agentTask = String(args.task || '').trim(); + if (!agentName) return { error: 'spawn_agent: agent name is required' }; + if (!agentTask) return { error: 'spawn_agent: task is required' }; + + try { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const { AgentRunner } = require('../src/plugins/agent_runner'); + const loader = new AgentLoader(cwd); + const agentDef = loader.get(agentName); + if (!agentDef) { + const valid = loader.list().map(a => a.name); + return { error: `spawn_agent: agent "${agentName}" not found. Valid agents: ${valid.join(', ') || '(none defined)'}` }; + } + const agentCtx = { + config, + flags: flags || {}, + tui: tui || { renderDiff: () => null }, + skillManager: ctx.skillManager || null, + }; + const runner = new AgentRunner(agentDef, agentCtx); + const result = await runner.run(agentTask); + const summary = `[${agentName}] steps=${result.steps} tokens=${result.tokens}${result.error ? ' error=' + result.error : ''}`; + return { result: result.output ? `${summary}\n\n${result.output}` : summary }; + } catch (e) { + return { error: `spawn_agent: ${e.message}` }; + } + } + case 'bone_compile': { const safe = safeResolvePath(args.path, cwd); if (!safe.ok) return { error: `bone_compile rejected: ${safe.reason}` }; diff --git a/bin/live_settings.js b/bin/live_settings.js new file mode 100644 index 00000000..2d22936d --- /dev/null +++ b/bin/live_settings.js @@ -0,0 +1,96 @@ +'use strict'; + +// SmallCode — live activity feed settings (issue #77) +// +// Four independently-toggleable live-output features, seeded from env and +// flipped at runtime by the `/live` command: +// tools — show a tool the moment it starts, update to ✓/✗ on completion +// context — live context-usage meter in the footer +// stream — stream the model reply token-by-token (changes request path) +// thinking — live dimmed preview of reasoning (changes request path) +// +// stream/thinking default OFF because they switch chatCompletion to a +// streaming request; tools/context default ON (pure display, no risk). + +const FEATURES = ['tools', 'context', 'stream', 'thinking']; + +const ENV = { + tools: 'SMALLCODE_LIVE_TOOLS', + context: 'SMALLCODE_LIVE_CONTEXT', + stream: 'SMALLCODE_LIVE_STREAM', + thinking: 'SMALLCODE_LIVE_THINKING', +}; + +const DEFAULTS = { tools: true, context: true, stream: false, thinking: false }; + +function _envBool(name, dflt) { + const v = process.env[name]; + if (v == null || v === '') return dflt; + return /^(1|true|on|yes|enabled?)$/i.test(String(v).trim()); +} + +let _settings = null; + +function getLiveSettings() { + if (!_settings) { + _settings = {}; + for (const f of FEATURES) _settings[f] = _envBool(ENV[f], DEFAULTS[f]); + } + return _settings; +} + +function setLive(feature, value) { + if (!FEATURES.includes(feature)) return false; + getLiveSettings()[feature] = !!value; + return true; +} + +// Parse an on/off token. Returns true, false, 'toggle' (empty), or null (bad). +function _parseValue(tok) { + if (tok == null || tok === '') return 'toggle'; + if (/^(1|true|on|yes|enabled?)$/i.test(tok)) return true; + if (/^(0|false|off|no|disabled?)$/i.test(tok)) return false; + return null; +} + +// Resolve a `/live …` argument string into a structured action: +// { action: 'status'|'set'|'error', feature?, value?, text } +// `text` is ready to print. Mutates settings on a successful 'set'. +function resolveLiveCommand(argStr) { + const s = String(argStr || '').trim(); + if (!s) return { action: 'status', text: formatStatus() }; + + const parts = s.split(/\s+/); + const feature = parts[0].toLowerCase(); + const val = _parseValue(parts[1]); + + if (feature === 'all') { + if (val === null) return { action: 'error', text: ` Invalid value "${parts[1]}". Use on|off.` }; + const v = val === 'toggle' ? true : val; + for (const f of FEATURES) setLive(f, v); + return { action: 'set', feature: 'all', value: v, text: formatStatus() }; + } + + if (!FEATURES.includes(feature)) { + return { action: 'error', text: ` Unknown feature "${feature}". Use: ${FEATURES.join(', ')} (or "all").` }; + } + if (val === null) return { action: 'error', text: ` Invalid value "${parts[1]}". Use on|off.` }; + + const current = getLiveSettings()[feature]; + const newVal = val === 'toggle' ? !current : val; + setLive(feature, newVal); + return { action: 'set', feature, value: newVal, text: ` live ${feature}: ${newVal ? 'on' : 'off'}` }; +} + +function formatStatus() { + const s = getLiveSettings(); + const lines = [' Live activity (issue #77):']; + for (const f of FEATURES) lines.push(` ${f.padEnd(9)} ${s[f] ? 'on' : 'off'}`); + lines.push(' Toggle: /live [on|off]'); + return lines.join('\n'); +} + +// Test-only: drop the cached singleton so env changes re-seed. +function _reset() { _settings = null; } + +module.exports = { FEATURES, getLiveSettings, setLive, resolveLiveCommand, formatStatus, _reset }; diff --git a/bin/memory.js b/bin/memory.js index fbe842d1..9183be2f 100644 --- a/bin/memory.js +++ b/bin/memory.js @@ -20,7 +20,7 @@ const INDEX_FILE = '.smallcode/memory/index.json'; // ─── Memory Object ─────────────────────────────────────────────────────────── class MemoryObject { - constructor({ id, type, title, content, tags, relations, createdAt, updatedAt, source }) { + constructor({ id, type, title, content, tags, relations, createdAt, updatedAt, source, tier, last_used_at }) { this.id = id || crypto.randomUUID().slice(0, 8); this.type = type; // decision | workflow | gotcha | convention | context | source this.title = title; @@ -30,6 +30,8 @@ class MemoryObject { this.createdAt = createdAt || new Date().toISOString(); this.updatedAt = updatedAt || new Date().toISOString(); this.source = source || null; // { file, line, commit } + this.tier = tier || 'hot'; // hot | archive + this.last_used_at = last_used_at || this.createdAt; } toJSON() { @@ -37,6 +39,7 @@ class MemoryObject { id: this.id, type: this.type, title: this.title, content: this.content, tags: this.tags, relations: this.relations, createdAt: this.createdAt, updatedAt: this.updatedAt, source: this.source, + tier: this.tier, last_used_at: this.last_used_at, }; } } diff --git a/bin/provider-wizard/wizard.js b/bin/provider-wizard/wizard.js index 70d3c833..aa52402f 100644 --- a/bin/provider-wizard/wizard.js +++ b/bin/provider-wizard/wizard.js @@ -70,6 +70,28 @@ async function validateApiKey(provider, apiKey, baseUrl) { } } +// List models from an OpenAI-compatible /models endpoint. Used to offer a +// picker for local providers (Ollama, LM Studio) where the installed models +// are knowable. Returns [] on any failure — caller falls back to free text. +async function fetchModels(baseUrl, apiKey) { + const url = (baseUrl || '').replace(/\/+$/, ''); + if (!url) return []; + try { + const headers = apiKey ? { 'Authorization': `Bearer ${apiKey}` } : {}; + const res = await fetch(`${url}/models`, { + headers, + signal: AbortSignal.timeout(5000), + }); + if (!res.ok) return []; + const data = await res.json(); + return (Array.isArray(data.data) ? data.data : []) + .map(m => m && m.id) + .filter(Boolean); + } catch { + return []; + } +} + function mergeEnvFile(filePath, newVars) { let lines = []; try { @@ -118,8 +140,11 @@ async function runWizard(options = {}) { // Load existing env const existingEnv = parseEnvFile(envPath); - let rl = null; - if (isInteractive) { + // Borrow the caller's readline when available — creating a second interface + // on the same stdin makes both echo every keystroke (duplicated letters). + const borrowedRl = options.rl && typeof options.rl.question === 'function' ? options.rl : null; + let rl = borrowedRl; + if (isInteractive && !rl) { rl = readline.createInterface({ input: process.stdin, output: process.stdout, @@ -212,7 +237,23 @@ async function runWizard(options = {}) { }; let model = options.model || ''; if (!model && isInteractive) { - model = await ask(rl, ' Model name', defaultModels[provider] || ''); + // Local providers: list installed models so the user can pick instead + // of typing the exact name. Falls back to free text if the server is + // unreachable or the list is empty. + if (!providerInfo.keyEnv) { + process.stdout.write(` Fetching models from ${baseUrl}...`); + const models = await fetchModels(baseUrl, apiKey); + if (models.length) { + console.log(` \x1b[32m${models.length} found\x1b[0m`); + const idx = await askNumber(rl, ' Select a model:', models); + if (idx >= 0) model = models[idx]; + } else { + console.log(' \x1b[33mnone found — enter manually\x1b[0m'); + } + } + if (!model) { + model = await ask(rl, ' Model name', defaultModels[provider] || ''); + } } model = model || defaultModels[provider] || ''; @@ -334,8 +375,8 @@ async function runWizard(options = {}) { return result; } finally { - if (rl) rl.close(); + if (rl && !borrowedRl) rl.close(); } } -module.exports = { runWizard, ask, askNumber, askYesNo, validateApiKey, mergeEnvFile }; +module.exports = { runWizard, ask, askNumber, askYesNo, validateApiKey, mergeEnvFile, fetchModels }; diff --git a/bin/smallcode.js b/bin/smallcode.js index d2ac8768..d6a36ec0 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -68,6 +68,7 @@ const { ToolScorer, checkAndEnforceHardFail, classifyTask, classifyTaskAsync } = const { EscalationEngine } = require('./escalation'); const { EarlyStopDetector } = require('../src/governor/early_stop'); const { QualityMonitor } = require('../src/governor/quality_monitor'); +const { normalizeToolCall } = require('../src/tools/tool_aliases'); const { applyReadGuard } = require('../src/session/read_guard'); const { TokenMonitor } = require('./token_monitor'); const { TraceRecorder } = require('./trace_recorder'); @@ -133,6 +134,39 @@ let tokenTracker = null; // Fullscreen TUI reference for streaming (set when fullscreen mode is active) let _fullscreenRef = null; +// Live activity feed (issue #77). _activeToolHandle is the in-progress tool +// line started in the dispatch loop (runAgentLoop) and finished in the +// console.log override (runTUI) — module-scoped so both closures share it. +const { getLiveSettings } = require('./live_settings'); +let _activeToolHandle = null; +// True when the current turn's assistant content was already shown live via +// streamToken, so the post-turn addChat('assistant') must not render it again. +let _contentStreamed = false; + +// One-line summary of a tool's most salient argument, for the live ⚙ line. +function summarizeToolArgs(name, args) { + if (!args || typeof args !== 'object') return ''; + const a = args; + const clip = (s, n = 48) => { s = String(s).replace(/\s+/g, ' ').trim(); return s.length > n ? s.slice(0, n - 1) + '…' : s; }; + if (a.path) return clip(a.path); + if (a.command) return clip(a.command); + if (a.pattern) return clip(a.pattern); + if (a.query) return clip(a.query); + if (a.task) return clip(a.task); + if (a.name) return clip(a.name); + return ''; +} + +// Push the current context usage to the footer meter (gated by /live context). +function updateContextMeter() { + if (!_fullscreenRef || !getLiveSettings().context) return; + try { + const win = Number(config?.context?.detected_window) || 0; + const m = tokenMonitor.contextMeter(win); + if (m.window > 0) _fullscreenRef.setContextMeter(m.pct, m.used, m.window); + } catch {} +} + const VERSION = require('../package.json').version; const LOGO = ` ⚡ SmallCode v${VERSION} @@ -164,6 +198,7 @@ for (let i = 0; i < args.length; i++) { else if (arg === '-p' || arg === '--provider') { flags.provider = args[++i]; } else if (arg === '--endpoint' || arg === '--base-url') { flags.endpoint = args[++i]; } else if (arg === '-P' || arg === '--prompt') { flags.prompt = args[++i]; } + else if (arg === '--task') { flags.task = args[++i]; } else if (arg === '--eval') { flags.eval = args[++i] || 'classify_accuracy'; } else if (arg === '--trace') { flags.trace = args[++i]; } else positional.push(arg); @@ -189,6 +224,7 @@ OPTIONS: -p, --provider Provider (ollama, openai, anthropic, llamacpp) --endpoint OpenAI-compatible endpoint/base URL -P, --prompt Run a single prompt non-interactively + --task Boot the interactive TUI and auto-run TEXT as the first prompt -r, --resume Resume last active session --non-interactive Run single prompt, no TUI --classic Use classic readline TUI (no alternate screen) @@ -287,6 +323,7 @@ async function runTUI(config) { onCommand: async (cmd) => { if (cmd === '/quit' || cmd === '/q' || cmd === '/exit') { if (sessionStore) sessionStore.save(conversationHistory, { tokens: tokenTracker ? tokenTracker.stats() : undefined }); + try { if (memoryStore) { const { runHygiene } = require('../src/memory/hygiene'); runHygiene(memoryStore); } } catch {} screen.leave(); killMCP() process.exit(0); @@ -299,8 +336,15 @@ async function runTUI(config) { console.log = (...args) => { captured += args.join(' ') + '\n'; }; // Create a mock rl for command handler const mockRl = { prompt: () => {}, close: () => { screen.leave(); process.exit(0); } }; + // Some commands (e.g. /provider's interactive wizard) need a real + // stdin/stdout the fullscreen TUI captures, so they silently did nothing + // (issue #80). resolveTuiCommand swaps them for a non-interactive + // equivalent plus guidance. See ./tui_commands for the mapping. + const { resolveTuiCommand } = require('./tui_commands'); + const { command: routedCmd, guidance } = resolveTuiCommand(cmd); try { - await handleCmd(cmd, mockRl); + await handleCmd(routedCmd, mockRl); + if (guidance) captured += guidance; } catch (e) { captured += `Error: ${e.message}\n`; } @@ -318,6 +362,7 @@ async function runTUI(config) { if (sessionStore) { sessionStore.save(conversationHistory, { tokens: tokenTracker ? tokenTracker.stats() : undefined }); } + try { if (memoryStore) { const { runHygiene } = require('../src/memory/hygiene'); runHygiene(memoryStore); } } catch {} killMCP() process.exit(0); }, @@ -327,6 +372,16 @@ async function runTUI(config) { screen.enter(); _fullscreenRef = screen; + // Auto-seed: if --task was given, fire its text through onSubmit once the event loop starts + if (flags.task) { + setImmediate(async () => { + screen.setStreaming(true); + await runAgentLoop(flags.task, config); + screen.setStreaming(false); + if (tokenTracker) screen.setTokenInfo(tokenTracker.formatShort()); + }); + } + // Track current tool name for pairing stdout.write (tool start) with console.log (result) let _currentToolName = ''; @@ -338,9 +393,18 @@ async function runTUI(config) { if (!clean) return; // Skip turn summaries unless verbose if (clean.startsWith('───') && !flags.verbose) return; - // Pair with current tool name for rich display + const isError = clean.startsWith('✗') || clean.includes('Exit code') || clean.includes('Timed out'); + // Live tools (issue #77): finish the in-progress ⚙ line in place, then + // refresh the context meter now that the tool changed context. + if (_activeToolHandle) { + screen.toolEnd(_activeToolHandle, isError ? 'err' : 'ok', clean); + _activeToolHandle = null; + _currentToolName = ''; + updateContextMeter(); + return; + } + // Classic path: pair with the captured tool name for rich display. if (_currentToolName) { - const isError = clean.startsWith('✗') || clean.includes('Exit code') || clean.includes('Timed out'); screen.addTool(_currentToolName, isError ? 'err' : 'ok', clean); _currentToolName = ''; } else { @@ -375,6 +439,17 @@ async function runTUI(config) { rl.prompt(); + // Auto-seed: if --task was given, run it once before waiting for user input + if (flags.task) { + setImmediate(async () => { + console.log(''); + await runAgentLoop(flags.task, config); + console.log(''); + console.log(tui.renderStatus(config, conversationHistory.length)); + rl.prompt(); + }); + } + rl.on('line', async (line) => { const input = line.trim(); if (!input) { rl.prompt(); return; } @@ -444,6 +519,7 @@ async function executeTool(name, args) { flags, config, tui, + skillManager, }); try { if (dedup) dedup.record(name, args, result); } catch {} @@ -592,7 +668,7 @@ async function runAgentLoop(userMessage, config) { if (message?.content) { conversationHistory.push({ role: 'assistant', content: message.content }); if (_fullscreenRef) { - _fullscreenRef.addChat('assistant', message.content); + if (!_contentStreamed) _fullscreenRef.addChat('assistant', message.content); } else { process.stdout.write(tui.renderMarkdown(message.content)); } @@ -984,6 +1060,9 @@ async function runAgentLoop(userMessage, config) { break; } + // Refresh the live context meter after each model turn (issue #77). + updateContextMeter(); + const message = response.choices?.[0]?.message; if (!message) break; @@ -1060,6 +1139,22 @@ async function runAgentLoop(userMessage, config) { } catch {} } + // ── TOOL ALIAS NORMALIZATION ───────────────────────────────────────── + // Rename OpenAI/Claude-style tool names (Read, Edit, Bash, str_replace …) + // to SmallCode's real names BEFORE the quality monitor sees them so the + // monitor doesn't flag them as hallucinated, and before dispatch so the + // real handler runs. Also drop quality-monitor echo calls that small models + // sometimes parrot back as tool names, preventing the feedback loop. + if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) { + message.tool_calls = message.tool_calls + .map(normalizeToolCall) + .filter(tc => { + if (!tc || !tc.function) return false; + const n = tc.function.name; + return n !== 'quality-monitor' && n !== 'quality_monitor'; + }); + } + // ── QUALITY MONITOR (itsy port) ────────────────────────────────────── // Catches structural failure modes the model emitted on this turn: // empty turns, blank tool names, hallucinated tool names, and exact @@ -1069,13 +1164,28 @@ async function runAgentLoop(userMessage, config) { // SMALLCODE_QUALITY_MONITOR=false. try { if (String(process.env.SMALLCODE_QUALITY_MONITOR || 'true').toLowerCase() !== 'false') { - const knownTools = getAllTools(config, currentToolCategory) + // Hallucination check must validate against the FULL tool registry + // (all categories), NOT the current router category. A real tool + // invoked from a different category — e.g. write_file while the + // two-stage router has the model in 'read' — is NOT hallucinated: the + // dispatcher widens currentToolCategory to 'plan' (all essential tools) + // and runs it. Scoping knownTools to currentToolCategory caused false + // "Tool write_file does not exist" steers that derailed small models + // mid-task (e.g. minimax could never write a step's output file). + const knownTools = getAllTools(config, null) .map(t => t && t.function && t.function.name) .filter(Boolean); const signal = qualityMonitor.inspect({ message, knownTools }); if (signal) { - if (_fullscreenRef) _fullscreenRef.addTool('quality', 'warn', signal.kind); - else console.log(` \x1b[33m⚠ quality-monitor: ${signal.kind}\x1b[0m`); + // SMALLCODE_QUALITY_MONITOR_QUIET=true suppresses the visible warning + // line but KEEPS the corrective steer (the injection below) — useful for + // driven/non-interactive runs where the ⚠ noise isn't wanted but the + // model should still be told the correct tool name. + const quiet = String(process.env.SMALLCODE_QUALITY_MONITOR_QUIET || 'false').toLowerCase() === 'true'; + if (!quiet) { + if (_fullscreenRef) _fullscreenRef.addTool('quality', 'warn', signal.kind); + else console.log(` \x1b[33m⚠ quality-monitor: ${signal.kind}\x1b[0m`); + } conversationHistory.push({ role: 'assistant', content: message.content || '' }); conversationHistory.push({ role: 'user', content: signal.injection }); continue; @@ -1209,8 +1319,15 @@ async function runAgentLoop(userMessage, config) { } } - // Show what's happening - process.stdout.write(tui.toolStart(toolName)); + // Show what's happening. With live tools on (issue #77), push an + // in-progress ⚙ line now and rewrite it to ✓/✗ when the result lands + // (handled in the console.log override). Otherwise keep the classic + // capture-and-pair behavior. + if (_fullscreenRef && getLiveSettings().tools) { + _activeToolHandle = _fullscreenRef.toolStart(toolName, summarizeToolArgs(toolName, toolArgs)); + } else { + process.stdout.write(tui.toolStart(toolName)); + } const toolStart2 = Date.now(); const result = await executeTool(toolName, toolArgs); @@ -1274,13 +1391,31 @@ async function runAgentLoop(userMessage, config) { // or — when context is already pressured — a head-only trim that // tells the model to grep first instead of re-reading. See // src/session/read_guard.js for the rationale. - // Override with SMALLCODE_MAX_TOOL_RESULT_CHARS env var. + // Cap tool results to protect small-model context. Controls: + // SMALLCODE_MAX_TOOL_RESULT_CHARS= explicit char cap + // SMALLCODE_MAX_TOOL_RESULT_CHARS=0|none|unlimited|off NO cap at all + // (unset) default scales with the model window — large-window models + // (>=131072 tokens, e.g. minimax-m3's 512K) are left UNCAPPED + // since trimming only exists to protect small windows; small + // models keep the 8000-char guard. const toolContent = result.result || result.error || ''; - const maxToolResultChars = parseInt(process.env.SMALLCODE_MAX_TOOL_RESULT_CHARS) || 8000; + const _rawCap = String(process.env.SMALLCODE_MAX_TOOL_RESULT_CHARS || '').trim().toLowerCase(); + const _detectedWindow = Number(config?.context?.detected_window) || 0; + let maxToolResultChars; + if (_rawCap === '0' || _rawCap === 'none' || _rawCap === 'unlimited' || _rawCap === 'off') { + maxToolResultChars = Infinity; // explicit "remove the cap" + } else if (_rawCap) { + maxToolResultChars = parseInt(_rawCap) || 8000; + } else { + maxToolResultChars = _detectedWindow >= 131072 ? Infinity : 8000; + } + const unlimited = !Number.isFinite(maxToolResultChars); const headLines = parseInt(process.env.SMALLCODE_READ_GUARD_HEAD_LINES) || 30; - const guardOff = String(process.env.SMALLCODE_READ_GUARD || 'true').toLowerCase() === 'false'; + const guardOff = unlimited || String(process.env.SMALLCODE_READ_GUARD || 'true').toLowerCase() === 'false'; let cappedContent; - if (guardOff) { + if (unlimited) { + cappedContent = toolContent; // no trimming whatsoever + } else if (guardOff) { cappedContent = toolContent.length > maxToolResultChars ? toolContent.slice(0, maxToolResultChars - 200) + '\n\n...(truncated, ' + toolContent.length + ' chars total)...\n' + toolContent.slice(-200) : toolContent; @@ -1785,9 +1920,9 @@ Read the FULL file above carefully. Fix ALL errors. Use the patch tool with the } } } catch {} - // Render with markdown highlighting + // Render with markdown highlighting (skip if already shown live — #77) if (_fullscreenRef) { - _fullscreenRef.addChat('assistant', message.content); + if (!_contentStreamed) _fullscreenRef.addChat('assistant', message.content); } else { process.stdout.write(tui.renderMarkdown(message.content)); } @@ -2086,21 +2221,29 @@ function getMemoryContext(messages) { } } -// Auto-load relevant skills based on the user's message +// Auto-load relevant skills based on the user's message. // Fix #18: Cap skill injection to ~1000 tokens (4000 chars). Multiple matching // skills can each be a full .md file, quickly blowing up the system prompt. +// +// Lazy-skills: always inject the compact index (one line per skill, ~8 tokens each) +// so the model can call use_skill to pull any body on demand. Auto-matched skill +// bodies are appended after the index, subject to the 4000-char aggregate cap. function getSkillContext(messages) { if (!skillManager) return ''; try { + const { formatSkillIndex } = require('../src/plugins/skill_index_formatter'); + const index = skillManager.getIndex(); + const indexStr = formatSkillIndex(index); + const lastUser = [...messages].reverse().find(m => m.role === 'user'); - if (!lastUser) return ''; - const skills = skillManager.getAutoSkills(lastUser.content); - if (skills.length === 0) return ''; - const formatted = skillManager.formatForPrompt(skills); + const autoSkills = lastUser ? skillManager.getAutoSkills(lastUser.content) : []; + const autoFormatted = skillManager.formatForPrompt(autoSkills); + + const combined = indexStr + (autoFormatted ? '\n' + autoFormatted : ''); // Hard cap: truncate if too long - return formatted.length > 4000 - ? formatted.slice(0, 4000) + '\n... (skills truncated to fit context)' - : formatted; + return combined.length > 4000 + ? combined.slice(0, 4000) + '\n... (skills truncated to fit context)' + : combined; } catch { return ''; } @@ -2398,6 +2541,15 @@ async function chatCompletion(config, messages) { } } + // Live streaming (issue #77, Phase B): opt-in via /live stream. Only when a + // fullscreen TUI is attached to receive tokens. Request usage in the final + // chunk so the context meter still updates. + const wantStream = !!(_fullscreenRef && getLiveSettings().stream); + if (wantStream) { + body.stream = true; + body.stream_options = { include_usage: true }; + } + let response; try { response = await fetch(`${baseUrl}/chat/completions`, { @@ -2449,7 +2601,8 @@ async function chatCompletion(config, messages) { const retry = await fetch(`${baseUrl}/chat/completions`, { method: 'POST', headers, - body: JSON.stringify(body), + // Retry non-streamed so the JSON parse below is unambiguous. + body: JSON.stringify({ ...body, stream: false, stream_options: undefined }), }); if (retry.ok) return await retry.json(); } catch {} @@ -2462,7 +2615,42 @@ async function chatCompletion(config, messages) { return null; } - const data = await response.json(); + // Consume the response. When streaming (Phase B), assemble the SSE deltas + // back into the same `data` shape the non-streaming path produces, driving + // the live chat/thinking views as tokens arrive. On any streaming failure, + // fall back to whatever was assembled so far. The non-streaming path is + // unchanged. + let data; + if (wantStream && response.body && typeof response.body.getReader === 'function') { + const { StreamAssembler, parseSSEBuffer } = require('./stream_assembler'); + const assembler = new StreamAssembler(); + const showThinking = getLiveSettings().thinking; + try { + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buf = ''; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buf += decoder.decode(value, { stream: true }); + const { events, rest } = parseSSEBuffer(buf); + buf = rest; + for (const ev of events) { + if (ev.done || !ev.json) continue; + assembler.pushChunk(ev.json, { + onContent: (t) => { if (_fullscreenRef) _fullscreenRef.streamToken(t); }, + onReasoning: showThinking ? (t) => { if (_fullscreenRef) _fullscreenRef.streamThinking(t); } : undefined, + }); + } + } + } catch { /* fall through with whatever assembled so far */ } + if (_fullscreenRef) _fullscreenRef.endStream(); + data = assembler.toData(); + _contentStreamed = !!(_fullscreenRef && assembler.content); + } else { + data = await response.json(); + _contentStreamed = false; + } // Length-truncation recovery: reasoning models served via LM Studio // (lfm2.x, Qwen3, DeepSeek R1) expose a separate `reasoning_content` @@ -2546,6 +2734,7 @@ async function chatCompletion(config, messages) { }); sessionStore.autoTitle(conversationHistory); } + try { if (memoryStore) { const { runHygiene } = require('../src/memory/hygiene'); runHygiene(memoryStore); } } catch {} return data; } catch (err) { @@ -3043,17 +3232,23 @@ async function main() { skillManager = new SkillManager(process.cwd()); - // Initialize MCP client (connect to external MCP servers) + // Initialize MCP client (connect to external MCP servers). + // Skipped entirely in --mcp server mode: an MCP server must not also act as + // an MCP host. Otherwise a self-referential `smallcode --mcp` entry in + // mcp.json makes each server spawn another server recursively — an unbounded + // fork bomb that exhausts RAM (issue #82). let mcpClient = null; - const mcpClientInstance = new MCPClient(process.cwd()); - if (mcpClientInstance.loadConfig() > 0) { - mcpClient = mcpClientInstance; - // Connect asynchronously — don't block boot - mcpClient.connectAll().then(toolCount => { - if (toolCount > 0 && _fullscreenRef) { - _fullscreenRef.addTool('mcp-client', 'ok', `${toolCount} external tools from ${mcpClient.servers.size} servers`); - } - }).catch(() => {}); + if (!flags.mcp) { + const mcpClientInstance = new MCPClient(process.cwd()); + if (mcpClientInstance.loadConfig() > 0) { + mcpClient = mcpClientInstance; + // Connect asynchronously — don't block boot + mcpClient.connectAll().then(toolCount => { + if (toolCount > 0 && _fullscreenRef) { + _fullscreenRef.addTool('mcp-client', 'ok', `${toolCount} external tools from ${mcpClient.servers.size} servers`); + } + }).catch(() => {}); + } } // Initialize session + token tracking @@ -3118,7 +3313,8 @@ async function main() { return; } - if (flags.nonInteractive || flags.prompt || positional.length > 0) { + // --task boots the interactive TUI and auto-seeds the first prompt; never non-interactive + if (!flags.task && (flags.nonInteractive || flags.prompt || positional.length > 0)) { const prompt = flags.prompt || positional.join(' '); await runNonInteractive(config, prompt); return; diff --git a/bin/stream_assembler.js b/bin/stream_assembler.js new file mode 100644 index 00000000..8f026994 --- /dev/null +++ b/bin/stream_assembler.js @@ -0,0 +1,87 @@ +'use strict'; + +// SmallCode — OpenAI SSE stream assembler (issue #77, Phase B) +// +// Reassembles a streamed chat completion (stream:true) into the exact same +// non-streaming `data` object the rest of chatCompletion expects, so all the +// downstream logic (tool-call extraction, length recovery, usage) is untouched. +// Pure and side-effect-free except for the optional onContent/onReasoning +// callbacks, which exist purely to drive the live TUI. + +class StreamAssembler { + constructor() { + this.content = ''; + this.reasoning = ''; + this.toolCalls = []; // index → { id, type, function: { name, arguments } } + this.finishReason = null; + this.usage = null; + } + + // Fold one parsed OpenAI streaming chunk into the running state. + pushChunk(obj, { onContent, onReasoning } = {}) { + if (!obj || typeof obj !== 'object') return; + const choice = obj.choices && obj.choices[0]; + if (choice) { + const delta = choice.delta || {}; + if (typeof delta.content === 'string' && delta.content) { + this.content += delta.content; + if (onContent) onContent(delta.content); + } + // Reasoning models (Qwen3, DeepSeek R1) stream a separate field. + const reason = delta.reasoning_content; + if (typeof reason === 'string' && reason) { + this.reasoning += reason; + if (onReasoning) onReasoning(reason); + } + if (Array.isArray(delta.tool_calls)) { + for (const tc of delta.tool_calls) { + const idx = Number.isInteger(tc.index) ? tc.index : 0; + if (!this.toolCalls[idx]) { + this.toolCalls[idx] = { id: tc.id || `call_${idx}`, type: 'function', function: { name: '', arguments: '' } }; + } + const slot = this.toolCalls[idx]; + if (tc.id) slot.id = tc.id; + if (tc.type) slot.type = tc.type; + if (tc.function && tc.function.name) slot.function.name += tc.function.name; + if (tc.function && typeof tc.function.arguments === 'string') slot.function.arguments += tc.function.arguments; + } + } + if (choice.finish_reason) this.finishReason = choice.finish_reason; + } + // Final chunk (with stream_options.include_usage) carries usage. + if (obj.usage) this.usage = obj.usage; + } + + // Build the OpenAI-compatible non-streaming response object. + toData() { + const message = { role: 'assistant', content: this.content }; + const tcs = this.toolCalls.filter(Boolean); + if (tcs.length) message.tool_calls = tcs; + if (this.reasoning) message.reasoning_content = this.reasoning; + return { + choices: [{ message, finish_reason: this.finishReason || 'stop' }], + usage: this.usage || undefined, + }; + } +} + +// Split an accumulating SSE text buffer into complete events. Returns +// { events, rest } where `rest` is the trailing partial line to carry over to +// the next read. Each event is { json } or { done: true }. +function parseSSEBuffer(buffer) { + const events = []; + let rest = String(buffer || ''); + let nl; + while ((nl = rest.indexOf('\n')) !== -1) { + const line = rest.slice(0, nl).trim(); + rest = rest.slice(nl + 1); + if (!line || !line.startsWith('data:')) continue; + const payload = line.slice(5).trim(); + if (payload === '[DONE]') { events.push({ done: true }); continue; } + try { events.push({ json: JSON.parse(payload) }); } + catch { /* malformed/partial — drop this line */ } + } + return { events, rest }; +} + +module.exports = { StreamAssembler, parseSSEBuffer }; diff --git a/bin/token_monitor.js b/bin/token_monitor.js index d279dd6c..9bacedff 100644 --- a/bin/token_monitor.js +++ b/bin/token_monitor.js @@ -10,6 +10,7 @@ class TokenMonitor { this.totalCalls = 0; this.compactions = 0; this.evictions = 0; + this.lastPromptTokens = 0; this._nextCallIsNewTurn = false; } @@ -20,6 +21,9 @@ class TokenMonitor { this.totalPrompt += promptTokens || 0; this.totalCompletion += completionTokens || 0; this.totalCalls++; + // Most recent prompt size = how much context is currently in play. Drives + // the live context meter (issue #77). + this.lastPromptTokens = promptTokens || 0; if (!this.turns.length || metadata.newTurn || this._nextCallIsNewTurn) { this.turns.push({ calls: 0, promptTokens: 0, completionTokens: 0, toolCalls: 0 }); @@ -35,6 +39,18 @@ class TokenMonitor { recordCompaction() { this.compactions++; } recordEviction() { this.evictions++; } + /** + * Live context-usage snapshot for the TUI meter (issue #77). `window` is the + * model's context length in tokens. Returns { pct, used, window } where + * `used` is the most recent prompt size. + */ + contextMeter(window) { + const used = this.lastPromptTokens || 0; + const win = window || 0; + const pct = win > 0 ? (used / win) * 100 : 0; + return { pct, used, window: win }; + } + /** * Get efficiency metrics. */ diff --git a/bin/tools.js b/bin/tools.js index c34191a1..3d6951d4 100644 --- a/bin/tools.js +++ b/bin/tools.js @@ -32,6 +32,8 @@ const TOOLS = [ { type: 'function', function: { name: 'contract_assert_pass', description: 'Mark a contract assertion as passed, with command-line evidence. Use the assertion id from contract_status (e.g. "a01"). evidence should be a short (<240 char) summary of what was run and what it returned.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id (e.g. a01)' }, evidence: { type: 'string', description: 'Short summary of command output proving the assertion holds' }, command: { type: 'string', description: 'The command run (optional)' }, exit_code: { type: 'integer', description: 'Exit code of the command (optional)' } }, required: ['assertion_id'] } } }, { type: 'function', function: { name: 'contract_assert_fail', description: 'Mark a contract assertion as failed, with evidence. Used when a check ran and the result was wrong — not for skipping checks.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id (e.g. a01)' }, evidence: { type: 'string', description: 'Short summary of why the check failed' }, command: { type: 'string', description: 'The command run (optional)' }, exit_code: { type: 'integer', description: 'Exit code of the command (optional)' } }, required: ['assertion_id', 'evidence'] } } }, { type: 'function', function: { name: 'contract_assert_skip', description: 'Mark an assertion as skipped (not applicable in current scope). Skipped assertions count as resolved for the done-guard.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id' }, reason: { type: 'string', description: 'Why this assertion is being skipped' } }, required: ['assertion_id', 'reason'] } } }, + { type: 'function', function: { name: 'use_skill', description: 'Load the full body of a skill by name. Use this when the skill index lists a skill relevant to your task. Returns the full skill content plus any related skill descriptions.', parameters: { type: 'object', properties: { name: { type: 'string', description: 'Skill name from the index' } }, required: ['name'] } } }, + { type: 'function', function: { name: 'spawn_agent', description: 'Spawn a named sub-agent to perform a focused task. The agent runs in isolation with a narrowed tool set and returns its output. Use when you need a specialist agent (e.g. a code reviewer) to handle a subtask independently.', parameters: { type: 'object', properties: { agent: { type: 'string', description: 'Agent name (from /agents list)' }, task: { type: 'string', description: 'Task description for the agent' } }, required: ['agent', 'task'] } } }, ]; // ─── Provider Tools ───────────────────────────────────────────────────────── diff --git a/bin/tui_commands.js b/bin/tui_commands.js new file mode 100644 index 00000000..ee3da51d --- /dev/null +++ b/bin/tui_commands.js @@ -0,0 +1,44 @@ +'use strict'; + +// Slash-command resolution for the fullscreen TUI. +// +// The fullscreen TUI captures stdout and hands slash commands a mock readline, +// so any command whose handler needs a real interactive terminal can't run +// inside it. resolveTuiCommand() maps a raw command to: +// { command, guidance } +// command — the command string to pass to the normal command handler +// guidance — extra text to append after the captured output, or null +// +// Today this only special-cases /provider (issue #80): its interactive wizard +// is swapped for a non-interactive status dump plus a pointer to the paths that +// DO work inside the TUI (/endpoint, /model, or the shell wizard). Everything +// else passes through unchanged. + +// Subcommands of /provider that are already non-interactive and safe to run +// inside the TUI as-is. +const PROVIDER_STATUS_SUBS = new Set(['status', '--status', '-s']); + +const PROVIDER_GUIDANCE = [ + '', + ' The interactive provider wizard needs a real terminal and', + ' cannot run inside the full-screen TUI. To reconfigure:', + ' • /endpoint — switch the API base URL here', + ' • /model — switch the model here', + ' • run `smallcode /provider` from your shell for the full wizard', +].join('\n'); + +function resolveTuiCommand(cmd) { + const raw = String(cmd || ''); + if (!/^\/provider\b/.test(raw)) { + return { command: raw, guidance: null }; + } + const sub = raw.replace(/^\/provider\s*/, '').trim(); + if (PROVIDER_STATUS_SUBS.has(sub)) { + return { command: raw, guidance: null }; + } + // Bare /provider (or an unknown subcommand): show status + guidance instead + // of silently launching a wizard the TUI can't drive. + return { command: '/provider status', guidance: PROVIDER_GUIDANCE }; +} + +module.exports = { resolveTuiCommand, PROVIDER_GUIDANCE }; diff --git a/docs/plans/2026-06-14-live-activity-feed-design.md b/docs/plans/2026-06-14-live-activity-feed-design.md new file mode 100644 index 00000000..829bcef4 --- /dev/null +++ b/docs/plans/2026-06-14-live-activity-feed-design.md @@ -0,0 +1,62 @@ +# Live Activity Feed (issue #77) + +Make the TUI show work as it happens instead of only finished tool results. +Four features, each independently toggleable, built in two phases. + +## Features & toggles + +Per-feature switches, runtime via `/live` and seeded from env: + +| Feature | What it adds | Env default | Default | +|-----------|----------------------------------------------------------|------------------------|---------| +| `tools` | Show a tool the moment it starts (`⚙ write_file: x.py`), update the same line to `✓`/`✗` on completion | `SMALLCODE_LIVE_TOOLS` | ON | +| `context` | Live context-usage meter in the footer, updated per action | `SMALLCODE_LIVE_CONTEXT` | ON | +| `stream` | Stream the model reply token-by-token into the chat | `SMALLCODE_LIVE_STREAM` | OFF (opt-in) | +| `thinking`| Live dimmed preview of reasoning as it streams | `SMALLCODE_LIVE_THINKING`| OFF (opt-in) | + +`/live` prints state; `/live [on|off]` toggles/sets one. +`stream`/`thinking` default OFF because they change the model request path. + +## Phase A — tool-start + context meter (no model-path change) + +- **`bin/live_settings.js`** (new): pure module. `getLiveSettings()` seeds from + env; `setLive(feature, value)`; `resolveLiveCommand(arg)` → `{ action, feature, + value, text }` for the `/live` command. Unit-testable in isolation. +- **TUI** (`src/tui/fullscreen.js`): + - `toolStart(name, detail)` → push an in-progress `⚙` line to chat + tool + panel, store its indices, return a handle `{ chatIdx, toolIdx }`. + - `toolEnd(handle, status, detail)` → rewrite that same line to `✓`/`✗`. + Falls back to `addTool` if the handle is missing. + - `setContextMeter(pct, used, window)` → footer indicator `ctx 42% (13k/32k)`. +- **TokenMonitor**: track `lastPromptTokens`; `contextMeter(window)` → `{ pct, + used, window }`. +- **Agent loop** (`bin/smallcode.js`): at the tool-dispatch site, when + `tools` on + fullscreen, `toolStart` before exec and `toolEnd` after — wired + through the existing `console.log`/`stdout.write` overrides so there is no + duplicate line. When off, the current behavior is unchanged. After each tool + and each turn, update `setContextMeter` when `context` on. +- **`/live` command**: handler in `bin/commands.js` + TUI palette entry. + +## Phase B — streaming + thinking (gated, isolated risk) + +- In `chatCompletion` (`bin/smallcode.js`), when `stream` on: set + `body.stream = true`, consume SSE incrementally (reuse the `model_client.js` + pattern), call `streamToken(delta.content)` for visible text, route + `reasoning_content`/`` deltas to a dimmed area when `thinking` on, + accumulate `tool_calls` deltas, then **reassemble the exact same `data` + object** the function returns today so all downstream logic is untouched. +- Any streaming error falls back to the response so far. The non-streaming + path (default) is left byte-for-byte unchanged. +- Extract SSE assembly into a testable helper. + +## Testing + +- `live_settings`: env parsing + `/live` resolution (pure unit tests). +- TUI: `toolStart`/`toolEnd` line mutation; `setContextMeter` formatting. +- Phase B: SSE-assembly helper fed canned chunks → assert assembled `data` + + `streamToken` call sequence. + +## Non-goals (YAGNI) + +- Persisting toggle state across restarts (env default + runtime only). +- A separate scrollable "activity" pane — reuse the existing chat + tool panel. diff --git a/src/compiled/tool_router.js b/src/compiled/tool_router.js index 10371a56..d5cedd10 100644 --- a/src/compiled/tool_router.js +++ b/src/compiled/tool_router.js @@ -228,25 +228,28 @@ function classifyToolCategory(message) { * @returns {string[]} tool names to include in the prompt */ function getToolsForCategory(category) { + // use_skill rides along in every tool-bearing category — the skill index + // is injected on every turn, so the model must always be able to pull a + // skill body regardless of how the task was classified (~80 token cost). switch (category) { case 'code_intel': - return ['graph_search', 'explain_symbol', 'read_file', 'find_files', 'search', 'hybrid_search']; + return ['graph_search', 'explain_symbol', 'read_file', 'find_files', 'search', 'hybrid_search', 'use_skill']; case 'read': - return ['read_file', 'list_projects', 'graph_search', 'find_files', 'find_and_read']; + return ['read_file', 'list_projects', 'graph_search', 'find_files', 'find_and_read', 'use_skill']; case 'write': - return ['read_file', 'write_file', 'patch', 'bash', 'read_and_patch', 'create_and_run']; + return ['read_file', 'write_file', 'patch', 'bash', 'read_and_patch', 'create_and_run', 'use_skill', 'spawn_agent']; case 'search': - return ['search', 'find_files', 'graph_search', 'read_file', 'explain_symbol', 'search_and_read', 'hybrid_search']; + return ['search', 'find_files', 'graph_search', 'read_file', 'explain_symbol', 'search_and_read', 'hybrid_search', 'use_skill']; case 'run': - return ['bash', 'run', 'read_file']; + return ['bash', 'run', 'read_file', 'use_skill']; case 'plan': - return ['read_file', 'write_file', 'patch', 'bash', 'search', 'find_files', 'graph_search', 'memory_load', 'memory_remember', 'bone_compile', 'bone_check', 'read_and_patch', 'create_and_run', 'find_and_read', 'search_and_read']; + return ['read_file', 'write_file', 'patch', 'bash', 'search', 'find_files', 'graph_search', 'memory_load', 'memory_remember', 'bone_compile', 'bone_check', 'read_and_patch', 'create_and_run', 'find_and_read', 'search_and_read', 'use_skill', 'spawn_agent']; case 'web': - return ['web_search', 'web_fetch', 'read_file']; + return ['web_search', 'web_fetch', 'read_file', 'use_skill']; case 'respond': return []; // No tools needed for pure responses default: - return ['read_file', 'write_file', 'patch', 'bash', 'search']; + return ['read_file', 'write_file', 'patch', 'bash', 'search', 'use_skill', 'spawn_agent']; } } diff --git a/src/governor/quality_monitor.js b/src/governor/quality_monitor.js index a64ed92f..5919399a 100644 --- a/src/governor/quality_monitor.js +++ b/src/governor/quality_monitor.js @@ -58,7 +58,7 @@ class QualityMonitor { return this._fire({ kind: 'empty_response', injection: - '[QUALITY-MONITOR] Your previous response had no text and no tool ' + + 'Self-check note: Your previous response had no text and no tool ' + 'calls. Continue the task — either reply to the user or invoke a ' + 'tool. Do not return an empty turn.', }); @@ -72,7 +72,7 @@ class QualityMonitor { return this._fire({ kind: 'empty_tool_name', injection: - '[QUALITY-MONITOR] You emitted a tool call with an empty name. ' + + 'Self-check note: You emitted a tool call with an empty name. ' + 'Restart the call with a real tool name. Available tools are ' + `listed in the system prompt (e.g. ${this._sampleTools(knownTools)}).`, }); @@ -88,7 +88,7 @@ class QualityMonitor { return this._fire({ kind: 'hallucinated_tool', injection: - `[QUALITY-MONITOR] Tool "${name}" does not exist. Pick one ` + + `Self-check note: Tool "${name}" does not exist. Pick one ` + `from the registered tool list. Closest matches: ` + `${this._closestMatches(name, knownTools)}.`, }); @@ -107,7 +107,7 @@ class QualityMonitor { kind: 'repeat_call', signature: sig, injection: - `[QUALITY-MONITOR] You are repeating the same tool call ` + + `Self-check note: You are repeating the same tool call ` + `(${tc.function.name}) with identical arguments. The previous ` + 'call already returned a result — read it before retrying. If ' + 'you must retry, change the arguments first.', diff --git a/src/memory/hygiene.js b/src/memory/hygiene.js new file mode 100644 index 00000000..c84c206e --- /dev/null +++ b/src/memory/hygiene.js @@ -0,0 +1,242 @@ +'use strict'; + +// SmallCode — Memory Hygiene +// Promotes memory objects to hot/archive tiers and prunes stale entries. +// Runs silently at session-save points; never throws — all errors are swallowed. +// +// Tier model: +// hot — actively used; default for new entries +// archive — dormant; de-ranked in retrieval (0.3x weight) +// +// Age rules (applied in order): +// hot + last_used_at > HOT_CAP_AGE_DAYS → archive +// archive + age > DELETE_AGE_DAYS → forget +// hot count > HOT_CAP → oldest BATCH → archive + +const fs = require('fs'); +const path = require('path'); + +const HOT_CAP = 20; // max hot-tier entries +const BATCH = 5; // how many to archive per cap sweep +const ARCHIVE_AGE = 60; // days unused before hot → archive +const DELETE_AGE = 90; // days in archive before deletion +const MS_PER_DAY = 86400000; + +/** + * Normalize a store to a common interface regardless of whether it's the + * SQLite budget-aware-mcp store or the fallback MemoryStore from bin/memory.js. + * + * Returns { all, getMeta, setMeta, forget } where: + * all() → MemoryObject[] + * getMeta(obj) → { tier, last_used_at } + * setMeta(obj, m) → void (mutates in-place for fallback; updates DB for SQLite) + * forget(id) → void + */ +function makeAdapter(store) { + const isSqlite = typeof store.update === 'function'; + + function all() { + return store.all(); + } + + function getMeta(obj) { + return { + tier: obj.tier || 'hot', + last_used_at: obj.last_used_at || obj.createdAt || obj.created_at || new Date(0).toISOString(), + }; + } + + function setMeta(obj, meta) { + if (isSqlite) { + // SQLite store has update() — use it to avoid the forget+remember dedup + // trap (re-inserting identical content is blocked by content_hash check). + // We encode tier/last_used_at into the tags array so no schema change is + // needed on budget-aware-mcp. + try { + const existingTags = (obj.tags || []).filter(t => !t.startsWith('tier:') && !t.startsWith('last_used:')); + const newTags = [ + ...existingTags, + `tier:${meta.tier}`, + `last_used:${meta.last_used_at}`, + ]; + store.update(obj.id, { tags: newTags }); + } catch {} + } else { + // Fallback MemoryStore (bin/memory.js): mutate in-place and save. + obj.tier = meta.tier; + obj.last_used_at = meta.last_used_at; + if (typeof store.save === 'function') { + try { store.save(); } catch {} + } + } + } + + function forget(id) { + try { store.forget(id); } catch {} + } + + return { all, getMeta, setMeta, forget }; +} + +/** + * Extract tier/last_used_at from a memory object regardless of store type. + * For SQLite stores we encode these values in tags as 'tier:X' and 'last_used:ISO'. + */ +function extractMeta(obj) { + // Try direct properties first (fallback MemoryStore) + if (obj.tier && obj.last_used_at) { + return { tier: obj.tier, last_used_at: obj.last_used_at }; + } + // Try tags encoding (SQLite store) + const tags = obj.tags || []; + let tier = 'hot'; + let last_used_at = obj.createdAt || obj.created_at || new Date(0).toISOString(); + for (const t of tags) { + if (t.startsWith('tier:')) tier = t.slice(5); + if (t.startsWith('last_used:')) last_used_at = t.slice(10); + } + return { tier, last_used_at }; +} + +/** + * Run hygiene on the store. Silent: never throws. + * + * @param {object} store — MemoryStore or budget-aware-mcp store + * @param {object} [opts] + * @param {number} [opts.hotCap=20] + * @param {number} [opts.batch=5] + * @param {number} [opts.archiveAge=60] days + * @param {number} [opts.deleteAge=90] days + * @returns {{ archived: number, deleted: number, total: number }} + */ +function runHygiene(store, opts = {}) { + const hotCap = opts.hotCap ?? HOT_CAP; + const batch = opts.batch ?? BATCH; + const archiveAge = opts.archiveAge ?? ARCHIVE_AGE; + const deleteAge = opts.deleteAge ?? DELETE_AGE; + + let archived = 0; + let deleted = 0; + + try { + const adapter = makeAdapter(store); + const now = Date.now(); + const objects = adapter.all(); + + // Backfill: assign hot tier + last_used_at to any entry that lacks them. + for (const obj of objects) { + const m = extractMeta(obj); + if (!obj.tier && !obj.tags?.some(t => t.startsWith('tier:'))) { + adapter.setMeta(obj, { + tier: 'hot', + last_used_at: m.last_used_at, + }); + } + } + + // Re-read after backfill so we have fresh state. + const fresh = adapter.all(); + + // ── Age sweep ──────────────────────────────────────────────────────────── + for (const obj of fresh) { + const { tier, last_used_at } = extractMeta(obj); + const ageMs = now - new Date(last_used_at).getTime(); + const ageDays = ageMs / MS_PER_DAY; + + if (tier === 'hot' && ageDays > archiveAge) { + adapter.setMeta(obj, { tier: 'archive', last_used_at }); + archived++; + } else if (tier === 'archive' && ageDays > deleteAge) { + adapter.forget(obj.id); + deleted++; + } + } + + // ── Cap sweep ──────────────────────────────────────────────────────────── + // Re-read to get up-to-date list (age sweep may have archived some). + const afterAge = adapter.all().filter(obj => { + const { tier } = extractMeta(obj); + return tier === 'hot'; + }); + + if (afterAge.length > hotCap) { + // Sort by last_used_at ascending (oldest first) + afterAge.sort((a, b) => { + const { last_used_at: la } = extractMeta(a); + const { last_used_at: lb } = extractMeta(b); + return new Date(la).getTime() - new Date(lb).getTime(); + }); + const toArchive = afterAge.slice(0, batch); + for (const obj of toArchive) { + const { last_used_at } = extractMeta(obj); + adapter.setMeta(obj, { tier: 'archive', last_used_at }); + archived++; + } + } + } catch { + // Hygiene must never crash the session. + } + + return { archived, deleted, total: archived + deleted }; +} + +/** + * Render a human-readable memory index to a markdown string. + * Hot entries come before archive. Grouped by type within each tier. + * This file is GENERATED — never authoritative. + * + * @param {object} store + * @returns {string} + */ +function renderMemoryIndex(store) { + try { + const objects = store.all(); + if (objects.length === 0) return '# Memory Index\n\n(empty)\n'; + + const hot = []; + const archive = []; + for (const obj of objects) { + const { tier } = extractMeta(obj); + if (tier === 'archive') archive.push(obj); + else hot.push(obj); + } + + function groupByType(objs) { + const groups = {}; + for (const o of objs) { + if (!groups[o.type]) groups[o.type] = []; + groups[o.type].push(o); + } + return groups; + } + + function renderGroup(groups) { + let out = ''; + for (const [type, objs] of Object.entries(groups)) { + out += `\n### ${type} (${objs.length})\n`; + for (const o of objs) { + out += `- [${o.id}] **${o.title}**\n`; + } + } + return out; + } + + let md = `# Memory Index\n\nGenerated: ${new Date().toISOString()}\n`; + md += `Total: ${objects.length} (hot: ${hot.length}, archive: ${archive.length})\n`; + + if (hot.length > 0) { + md += '\n## Hot\n'; + md += renderGroup(groupByType(hot)); + } + if (archive.length > 0) { + md += '\n## Archive\n'; + md += renderGroup(groupByType(archive)); + } + + return md; + } catch { + return '# Memory Index\n\n(error rendering)\n'; + } +} + +module.exports = { runHygiene, renderMemoryIndex, extractMeta }; diff --git a/src/plugins/agent_loader.js b/src/plugins/agent_loader.js new file mode 100644 index 00000000..9dfbd16b --- /dev/null +++ b/src/plugins/agent_loader.js @@ -0,0 +1,125 @@ +// SmallCode — Agent Loader +// Loads agent definitions from .smallcode/agents/.md +// +// Frontmatter fields: +// name: agent name (defaults to filename stem) +// description: short description shown in /agents list +// tools: [tool1, tool2] — subset of canonical TOOLS the agent may use +// model: tier name (fast/default/medium/strong) or exact model name +// +// Body = system prompt (capped at 1600 chars in AgentRunner). +// +// Drafts quarantine: agents/drafts/ is never auto-loaded (Phase 3 will +// write agent drafts there; promotion via a future /evolve promote-agent). + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +// Reuse the same regex pair as skills.js for consistency +const FM_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/; +const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; + +class AgentLoader { + constructor(projectDir) { + this.projectDir = projectDir || process.cwd(); + this._agents = new Map(); // name → AgentDef + this._load(); + } + + _agentDir() { + return path.join(this.projectDir, '.smallcode', 'agents'); + } + + _bundledDir() { + return path.join(__dirname, '..', '..', 'agents'); + } + + _loadDir(dir) { + if (!fs.existsSync(dir)) return; + let entries; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + // Skip drafts/ directory — quarantined until Phase 3 promote + if (entry.isDirectory() && entry.name === 'drafts') continue; + if (entry.isDirectory()) continue; + if (!entry.name.endsWith('.md')) continue; + this._ingest(path.join(dir, entry.name), entry.name.replace(/\.md$/i, '')); + } + } + + _load() { + // Bundled defaults first; project-level overrides (Map.set overwrites same name) + this._loadDir(this._bundledDir()); + this._loadDir(this._agentDir()); + } + + _parseMeta(frontmatter) { + const meta = {}; + for (const rawLine of frontmatter.split(/\r?\n/)) { + const m = rawLine.match(KV_RE); + if (!m) continue; + let value = m[2].trim(); + // Inline array: tools: [read_file, bash] + if (value.startsWith('[') && value.endsWith(']')) { + value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); + } + meta[m[1]] = value; + } + return meta; + } + + _ingest(filePath, defaultName) { + let content; + try { + content = fs.readFileSync(filePath, 'utf-8'); + } catch { + return; + } + + const fmMatch = content.match(FM_RE); + let meta = {}; + let body = content; + + if (fmMatch) { + meta = this._parseMeta(fmMatch[1]); + body = fmMatch[2]; + } + + const name = meta.name || defaultName; + const tools = Array.isArray(meta.tools) ? meta.tools : []; + const description = meta.description || ''; + const model = meta.model || null; + + this._agents.set(name, { + name, + description, + tools, + model, + body: body.trim(), + path: filePath, + }); + } + + // Returns all agent definitions + list() { + return [...this._agents.values()].map(a => ({ + name: a.name, + description: a.description, + tools: a.tools, + model: a.model, + })); + } + + // Returns a single agent definition or null + get(name) { + return this._agents.get(name) || null; + } +} + +module.exports = { AgentLoader }; diff --git a/src/plugins/agent_runner.js b/src/plugins/agent_runner.js new file mode 100644 index 00000000..bddc76c7 --- /dev/null +++ b/src/plugins/agent_runner.js @@ -0,0 +1,223 @@ +// SmallCode — AgentRunner +// Runs a sub-agent as a bounded sub-conversation. +// +// Isolation guarantees: +// - Initial history = [{role:'user', content: task}] ONLY (never parent history) +// - Narrowed tools = agentDef.tools ∩ canonical TOOLS; always includes read_file +// - System prompt = agent body (capped 1600 chars) + tool list line (≤600 tokens total) +// - Non-streaming, direct fetch to model endpoint +// - Hard caps: MAX_STEPS=15, token budget min(8000, ctx*0.3) +// - No MCP, no plugins, no nested repair calls +// - run() NEVER throws — always returns AgentResult {output, steps, tokens, error?} + +'use strict'; + +const { TOOLS } = require('../../bin/tools'); +const { getModelTarget, getModelTargetForModel, withModelTarget, buildAuthHeaders } = require('../../bin/config'); +const { executeTool } = require('../../bin/executor'); + +const MAX_STEPS = 15; +const BODY_CAP = 1600; +const BODY_CAP_MARKER = '[truncated]'; +const CHARS_PER_TOKEN = 4; + +// All tool names in the canonical TOOLS array +const CANONICAL_TOOL_NAMES = new Set(TOOLS.map(t => t.function.name)); + +/** + * Build the narrowed tool list for a sub-agent. + * Intersection of agentDef.tools with canonical TOOLS; read_file always present. + * @param {string[]} agentTools - tools listed in agent frontmatter + * @returns {object[]} tool definitions + */ +function buildNarrowedTools(agentTools) { + const requested = new Set(agentTools || []); + // Always include read_file + requested.add('read_file'); + + return TOOLS.filter(t => { + const name = t.function.name; + return CANONICAL_TOOL_NAMES.has(name) && requested.has(name); + }); +} + +/** + * Build the sub-agent system prompt. + * Agent body capped at BODY_CAP chars; tool list appended on a final line. + * Total target: ≤600 tokens. + * @param {object} agentDef + * @param {object[]} tools + * @returns {string} + */ +function buildSubAgentPrompt(agentDef, tools) { + let body = agentDef.body || ''; + if (body.length > BODY_CAP) { + body = body.slice(0, BODY_CAP) + ' ' + BODY_CAP_MARKER; + } + const toolNames = tools.map(t => t.function.name).join(', '); + return `${body}\n\nAvailable tools: ${toolNames}`; +} + +/** + * Resolve the model target for a sub-agent. + * If agentDef.model names a tier (fast/default/medium/strong), use getModelTarget. + * Otherwise treat as a literal model name via getModelTargetForModel. + * Falls back to default tier if unset. + * @param {object} config + * @param {object} agentDef + * @returns {object} model target + */ +function resolveAgentTarget(config, agentDef) { + const TIERS = new Set(['fast', 'default', 'medium', 'strong']); + const modelField = agentDef.model; + if (!modelField) return getModelTarget(config, 'default'); + if (TIERS.has(modelField)) return getModelTarget(config, modelField); + return getModelTargetForModel(config, modelField); +} + +class AgentRunner { + constructor(agentDef, ctx) { + this.agentDef = agentDef; + this.ctx = ctx; // { config, flags, tui, skillManager } + } + + async run(task) { + const { agentDef, ctx } = this; + const { config } = ctx; + + const tools = buildNarrowedTools(agentDef.tools); + const systemPrompt = buildSubAgentPrompt(agentDef, tools); + const target = resolveAgentTarget(config, agentDef); + const requestConfig = withModelTarget(config, target); + const baseUrl = target.baseUrl; + + // Token budget + const detectedWindow = config?.context?.detected_window || 32768; + const tokenBudget = Math.min(8000, Math.floor(detectedWindow * 0.3)); + + // Isolated history — only the user task, never parent history + const history = [{ role: 'user', content: task }]; + + let steps = 0; + let totalTokens = 0; + let output = ''; + + try { + while (steps < MAX_STEPS) { + const estimatedTokens = history.reduce((sum, m) => { + const c = typeof m.content === 'string' ? m.content : JSON.stringify(m.content || ''); + return sum + Math.ceil(c.length / CHARS_PER_TOKEN); + }, 0); + + if (estimatedTokens > tokenBudget) { + output = history.filter(m => m.role === 'assistant').map(m => m.content || '').join('\n').trim(); + return { output: output || '(token budget exhausted)', steps, tokens: totalTokens }; + } + + const body = { + model: target.model, + messages: [{ role: 'system', content: systemPrompt }, ...history], + temperature: 0.1, + max_tokens: 1024, + }; + if (tools.length > 0) { + body.tools = tools; + } + + const headers = buildAuthHeaders(requestConfig); + + let data; + try { + const response = await fetch(`${baseUrl}/chat/completions`, { + method: 'POST', + headers, + body: JSON.stringify(body), + }); + if (!response.ok) { + const errText = await response.text().catch(() => ''); + return { output: '', steps, tokens: totalTokens, error: `HTTP ${response.status}: ${errText.slice(0, 200)}` }; + } + data = await response.json(); + } catch (fetchErr) { + return { output: '', steps, tokens: totalTokens, error: fetchErr.message }; + } + + if (data?.usage) { + totalTokens += (data.usage.prompt_tokens || 0) + (data.usage.completion_tokens || 0); + } + + const choice = data?.choices?.[0]; + if (!choice) { + return { output: '', steps, tokens: totalTokens, error: 'Empty response from model' }; + } + + const message = choice.message || {}; + history.push({ role: 'assistant', content: message.content || null, tool_calls: message.tool_calls }); + steps++; + + // If no tool calls, we have a final text response + if (!message.tool_calls || message.tool_calls.length === 0) { + output = (message.content || '').trim(); + return { output, steps, tokens: totalTokens }; + } + + // Check finish reason — stop if done + if (choice.finish_reason === 'stop' || choice.finish_reason === 'end_turn') { + output = (message.content || '').trim(); + return { output, steps, tokens: totalTokens }; + } + + // Execute tool calls — stripped ctx: no MCP, no plugins + const toolCtx = { + config: ctx.config, + flags: ctx.flags || {}, + tui: ctx.tui || { renderDiff: () => null }, + memoryStore: null, + mcpCall: async () => null, + pluginLoader: null, + mcpClient: null, + skillManager: ctx.skillManager || null, + _fullscreenRef: null, + }; + + for (const toolCall of message.tool_calls) { + const toolName = toolCall.function?.name; + let toolArgs; + // No repairToolCall — on JSON.parse failure use {} and let tool error + try { + toolArgs = JSON.parse(toolCall.function?.arguments || '{}'); + } catch { + toolArgs = {}; + } + + let toolResult; + try { + toolResult = await executeTool(toolName, toolArgs, toolCtx); + } catch (e) { + toolResult = { error: e.message }; + } + + const resultContent = toolResult.error + ? `Error: ${toolResult.error}` + : (toolResult.result || JSON.stringify(toolResult)); + + history.push({ + role: 'tool', + tool_call_id: toolCall.id, + content: resultContent, + }); + } + } + + // Stepped out — return whatever we have + const lastAssistant = [...history].reverse().find(m => m.role === 'assistant'); + output = (lastAssistant?.content || '').trim(); + return { output: output || '(max steps reached)', steps, tokens: totalTokens }; + + } catch (err) { + return { output: '', steps, tokens: totalTokens, error: err.message }; + } + } +} + +module.exports = { AgentRunner, buildNarrowedTools, buildSubAgentPrompt, resolveAgentTarget }; diff --git a/src/plugins/audit_log.js b/src/plugins/audit_log.js new file mode 100644 index 00000000..12b2591d --- /dev/null +++ b/src/plugins/audit_log.js @@ -0,0 +1,33 @@ +// SmallCode — Evolution Audit Log +// Thin JSONL appender/reader for evolver create events. One JSON object per +// line; append-only. Writes are atomic (tmp + rename) so a crash mid-write +// never corrupts existing history. + +const fs = require('fs'); +const path = require('path'); + +function appendEntry(filePath, entry) { + const dir = path.dirname(filePath); + if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); + const line = JSON.stringify(entry) + '\n'; + // Read-modify-write atomically: copy existing content + new line to a tmp + // file, then rename over the original. + let existing = ''; + try { existing = fs.readFileSync(filePath, 'utf-8'); } catch {} + const tmpPath = filePath + `.tmp.${process.pid}.${Date.now()}`; + fs.writeFileSync(tmpPath, existing + line, 'utf-8'); + fs.renameSync(tmpPath, filePath); +} + +function readEntries(filePath, limit = 100) { + let content = ''; + try { content = fs.readFileSync(filePath, 'utf-8'); } catch { return []; } + const entries = []; + for (const line of content.split('\n')) { + if (!line.trim()) continue; + try { entries.push(JSON.parse(line)); } catch {} + } + return entries.slice(-limit); +} + +module.exports = { appendEntry, readEntries }; diff --git a/src/plugins/evolver.js b/src/plugins/evolver.js new file mode 100644 index 00000000..139a3123 --- /dev/null +++ b/src/plugins/evolver.js @@ -0,0 +1,174 @@ +// SmallCode — Evolver (create-mode mechanics) +// Deterministic mechanics behind the /evolve command: proposal building, +// validation, name-collision checking, quarantined draft writing, audit +// logging, and structural enforcement of the 1-create-per-run cap. +// +// The fuzzy judgment (is this friction worth a skill?) happens in the +// command handler via an LLM call. Everything here is pure mechanics so it +// can be unit-tested without a model. +// +// Safety rules (mirrors the create-mode evolver pattern): +// - Drafts only: writes go to .smallcode/skills/drafts/, never live dirs +// - Never deletes, never commits +// - validateProposal must pass before any write +// - EvolverRun raises on the 2nd create in a single run + +const fs = require('fs'); +const path = require('path'); +const { appendEntry } = require('./audit_log'); + +const MAX_CREATES_PER_RUN = 1; +const NAME_RE = /^[A-Za-z0-9_-]+$/; +const VALID_TRIGGERS = new Set(['manual', 'auto', 'match']); + +class ProposalCapExceededError extends Error {} + +// ── Builders ────────────────────────────────────────────────────────────── + +function buildSkillProposal(name, description, body, options = {}) { + return { + kind: 'create', + artefact: 'skill', + name, + description, + body, + trigger: options.trigger || 'manual', + keywords: Array.isArray(options.keywords) ? options.keywords : [], + rationale: options.rationale || '', + }; +} + +// ── Validation ──────────────────────────────────────────────────────────── + +function validateProposal(proposal) { + const errors = []; + if (!proposal || typeof proposal !== 'object') return ['proposal must be an object']; + + if (proposal.artefact !== 'skill') { + errors.push(`artefact must be "skill", got ${JSON.stringify(proposal.artefact)}`); + } + if (typeof proposal.name !== 'string' || !NAME_RE.test(proposal.name)) { + errors.push('name must be a non-empty alphanumeric/-_ string'); + } + if (typeof proposal.description !== 'string' || !proposal.description.trim()) { + errors.push('description must be a non-empty string'); + } else if (/[\r\n]/.test(proposal.description)) { + errors.push('description must not contain newlines (frontmatter-injection risk)'); + } + if (typeof proposal.body !== 'string' || !proposal.body.trim()) { + errors.push('body must be a non-empty string'); + } + if (!VALID_TRIGGERS.has(proposal.trigger)) { + errors.push(`trigger must be one of manual|auto|match, got ${JSON.stringify(proposal.trigger)}`); + } + if (proposal.trigger === 'match' && (!Array.isArray(proposal.keywords) || proposal.keywords.length === 0)) { + errors.push('trigger "match" requires a non-empty keywords list'); + } + return errors; +} + +// ── Name-collision check ────────────────────────────────────────────────── + +// Look for an existing skill with this name across the standard skill dirs +// (live and drafts). Returns the first matching path or null. +function checkNameCollision(name, projectDir) { + const os = require('os'); + const roots = [ + path.join(projectDir, '.smallcode', 'skills'), + path.join(os.homedir(), '.smallcode', 'skills'), + path.join(os.homedir(), '.config', 'smallcode', 'skills'), + ]; + for (const root of roots) { + for (const candidate of [ + path.join(root, `${name}.md`), + path.join(root, name, 'SKILL.md'), + path.join(root, 'drafts', `${name}.md`), + ]) { + if (fs.existsSync(candidate)) return candidate; + } + } + return null; +} + +// ── Draft writer ────────────────────────────────────────────────────────── + +function _skillMd(proposal) { + const fm = [ + '---', + `name: ${proposal.name}`, + `description: ${proposal.description}`, + `trigger: ${proposal.trigger}`, + proposal.keywords.length ? `keywords: [${proposal.keywords.join(', ')}]` : null, + '---', + ].filter(Boolean).join('\n'); + let body = proposal.body.trim() + '\n'; + if (proposal.rationale) { + body += `\n/g, '')} -->\n`; + } + return `${fm}\n${body}`; +} + +function writeDraft(proposal, projectDir) { + const errors = validateProposal(proposal); + if (errors.length) throw new Error(`invalid proposal: ${errors.join('; ')}`); + + const draftsDir = path.resolve(projectDir, '.smallcode', 'skills', 'drafts'); + const target = path.resolve(draftsDir, `${proposal.name}.md`); + // Path containment — name is already validated, but defend anyway + if (!target.startsWith(draftsDir + path.sep)) { + throw new Error(`draft path escapes drafts dir: ${target}`); + } + if (!fs.existsSync(draftsDir)) fs.mkdirSync(draftsDir, { recursive: true }); + const tmpPath = target + `.tmp.${process.pid}.${Date.now()}`; + fs.writeFileSync(tmpPath, _skillMd(proposal), 'utf-8'); + fs.renameSync(tmpPath, target); + return target; +} + +// ── Audit log ───────────────────────────────────────────────────────────── + +function logCreateEvent(auditPath, proposal, rationale, sourceTraceIds) { + appendEntry(auditPath, { + ts: new Date().toISOString(), + kind: 'create', + artefact: proposal.artefact, + name: proposal.name, + rationale: rationale || proposal.rationale || '', + source_traces: Array.isArray(sourceTraceIds) ? sourceTraceIds : [], + }); +} + +// ── Per-run cap (structural) ────────────────────────────────────────────── + +// Stateful tracker enforcing the create cap by construction. Use this, not +// writeDraft directly, when running an evolution pass. +class EvolverRun { + constructor(maxCreates = MAX_CREATES_PER_RUN) { + this.maxCreates = maxCreates; + this.createsSoFar = 0; + this.written = []; + } + + writeDraft(proposal, projectDir) { + if (proposal && proposal.kind === 'create' && this.createsSoFar >= this.maxCreates) { + throw new ProposalCapExceededError( + `already wrote ${this.createsSoFar} create(s); cap is ${this.maxCreates}` + ); + } + const target = writeDraft(proposal, projectDir); + if (proposal.kind === 'create') this.createsSoFar++; + this.written.push(target); + return target; + } +} + +module.exports = { + buildSkillProposal, + validateProposal, + checkNameCollision, + writeDraft, + logCreateEvent, + EvolverRun, + ProposalCapExceededError, + MAX_CREATES_PER_RUN, +}; diff --git a/src/plugins/friction_analyzer.js b/src/plugins/friction_analyzer.js new file mode 100644 index 00000000..0165db58 --- /dev/null +++ b/src/plugins/friction_analyzer.js @@ -0,0 +1,133 @@ +// SmallCode — Friction Analyzer +// Deterministic friction-signal extraction from saved traces. No LLM calls — +// this produces the evidence the /evolve command hands to the model for +// judgment. +// +// Signals: +// - repeated_patterns: near-duplicate prompts appearing 3+ times with no +// matching skill keyword (the user keeps asking for the same thing by hand) +// - tool_retry_loops: 3+ consecutive failed calls of the same tool against +// the same file within a trace (the model keeps fighting the same wall) + +const REPEAT_THRESHOLD = 3; +const RETRY_THRESHOLD = 3; +const SIMILARITY_THRESHOLD = 0.5; + +// Filler words carry no task identity but dilute Jaccard similarity — +// "another seating chart please" must cluster with "a seating chart for..." +const STOPWORDS = new Set([ + 'the', 'and', 'for', 'with', 'that', 'this', 'these', 'those', 'from', + 'into', 'onto', 'please', 'can', 'you', 'could', 'would', 'will', + 'another', 'again', 'new', 'now', 'just', 'some', 'all', 'any', + 'make', 'give', 'get', 'want', 'need', 'like', +]); + +function _wordSet(text) { + return new Set( + String(text || '').toLowerCase().split(/[^a-z0-9]+/) + .filter(w => w.length > 2 && !STOPWORDS.has(w)) + ); +} + +function _jaccard(a, b) { + if (a.size === 0 && b.size === 0) return 0; + let inter = 0; + for (const w of a) if (b.has(w)) inter++; + return inter / (a.size + b.size - inter); +} + +function _isError(result) { + const s = String(result || ''); + return s.startsWith('✗') || /"error"\s*:/.test(s) || /^Error[:\s]/.test(s); +} + +// Group traces whose prompts are near-duplicates (Jaccard on word sets). +function _findRepeatedPatterns(traces, skillKeywords) { + const groups = []; // { words, prompts, traceIds } + for (const t of traces) { + const words = _wordSet(t.prompt); + if (words.size === 0) continue; + let placed = false; + for (const g of groups) { + if (_jaccard(words, g.words) >= SIMILARITY_THRESHOLD) { + g.prompts.push(t.prompt); + g.traceIds.push(t.id); + for (const w of words) g.words.add(w); + placed = true; + break; + } + } + if (!placed) groups.push({ words, prompts: [t.prompt], traceIds: [t.id] }); + } + + return groups + .filter(g => g.prompts.length >= REPEAT_THRESHOLD) + // Skip patterns a skill already covers (any keyword hits the group words) + .filter(g => !skillKeywords.some(kw => g.words.has(String(kw).toLowerCase()))) + .map(g => ({ + pattern: g.prompts[0].slice(0, 120), + count: g.prompts.length, + traceIds: g.traceIds, + })); +} + +// Detect consecutive failed calls of the same tool+file within each trace. +function _findToolRetryLoops(traces) { + const loops = []; + for (const t of traces) { + let runTool = null, runFile = null, failCount = 0; + const flush = () => { + if (failCount >= RETRY_THRESHOLD) { + loops.push({ tool: runTool, file: runFile, failCount, traceIds: [t.id] }); + } + runTool = null; runFile = null; failCount = 0; + }; + for (const step of t.steps || []) { + if (step.type !== 'tool_call') continue; + let file = ''; + try { + const args = typeof step.args === 'string' ? JSON.parse(step.args) : (step.args || {}); + file = args.path || args.file || ''; + } catch {} + const failed = _isError(step.result); + if (failed && step.name === runTool && file === runFile) { + failCount++; + } else { + flush(); + if (failed) { runTool = step.name; runFile = file; failCount = 1; } + } + } + flush(); + } + return loops; +} + +/** + * @param {object[]} traces - full trace objects (TraceRecorder.load shape) + * @param {object} options - { skillKeywords: string[] } keywords of existing skills + * @returns FrictionReport + */ +function extractFrictionSignals(traces, options = {}) { + const skillKeywords = options.skillKeywords || []; + const safe = (traces || []).filter(t => t && typeof t === 'object'); + return { + repeated_patterns: _findRepeatedPatterns(safe, skillKeywords), + tool_retry_loops: _findToolRetryLoops(safe), + analyzed_traces: safe.length, + }; +} + +// Compact text rendering of a friction report for the LLM prompt — counts +// and short descriptions only, never full trace content (budget guard). +function formatReportForPrompt(report) { + const lines = []; + for (const p of report.repeated_patterns) { + lines.push(`- Repeated request (${p.count}x): "${p.pattern}"`); + } + for (const l of report.tool_retry_loops) { + lines.push(`- Tool retry loop: ${l.tool} failed ${l.failCount}x in a row on ${l.file || '(no file)'}`); + } + return lines.join('\n').slice(0, 2000); +} + +module.exports = { extractFrictionSignals, formatReportForPrompt }; diff --git a/src/plugins/skill_index_formatter.js b/src/plugins/skill_index_formatter.js new file mode 100644 index 00000000..defc4258 --- /dev/null +++ b/src/plugins/skill_index_formatter.js @@ -0,0 +1,39 @@ +'use strict'; + +// SmallCode — Skill index formatter +// Produces a compact index string (one line per skill, ~8 tokens each) suitable +// for always-injecting into the system prompt, plus a full-body formatter for +// use_skill results that includes related skill names/descriptions (not bodies). + +/** + * Format a flat index of skills — one line per skill. + * @param {Array<{name:string, description:string, trigger:string, keywords:string[]}>} entries + * @returns {string} + */ +function formatSkillIndex(entries) { + if (!entries || entries.length === 0) return ''; + const lines = entries.map(e => { + const kw = e.keywords && e.keywords.length ? ` [${e.keywords.join(',')}]` : ''; + const desc = e.description ? ` — ${e.description}` : ''; + return ` ${e.name}${desc}${kw}`; + }); + return '\n\nAvailable skills (call use_skill to load):\n' + lines.join('\n'); +} + +/** + * Format a loaded skill body for the use_skill response. + * Appends brief related-skill entries (name + description only, not body). + * @param {object} skill — {name, description, content, keywords, trigger} + * @param {Array<{name:string, description:string}>} relatedEntries — index entries for related skills + * @returns {string} + */ +function formatSkillResult(skill, relatedEntries) { + let out = `[skill:${skill.name}]\n${skill.content}`; + if (relatedEntries && relatedEntries.length > 0) { + const rel = relatedEntries.map(e => ` ${e.name}${e.description ? ' — ' + e.description : ''}`).join('\n'); + out += `\n\nRelated skills:\n${rel}`; + } + return out; +} + +module.exports = { formatSkillIndex, formatSkillResult }; diff --git a/src/plugins/skills.js b/src/plugins/skills.js index 60c06bb1..68205b86 100644 --- a/src/plugins/skills.js +++ b/src/plugins/skills.js @@ -16,7 +16,15 @@ // `.agents/skills` or `.claude/skills` typically have no frontmatter — they // are treated as `manual`-trigger skills named after their parent directory. // +// The standard skill dirs also accept the nested `/SKILL.md` layout and +// flat `.md` files without frontmatter (named after the file) — both were +// previously skipped silently (closes #81). README-style files are ignored. +// // Frontmatter accepts both LF and CRLF line endings (closes #52). +// +// Lazy loading: index entries (frontmatter only) are stored in _index Map. +// Bodies are loaded on demand via _loadBody(name) and cached into skills Map. +// getIndex() returns flat IndexEntry list for prompt injection. const fs = require('fs'); const path = require('path'); @@ -24,11 +32,19 @@ const os = require('os'); const FM_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/; const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; +// Docs that live alongside skills but aren't skills themselves +const NON_SKILL_MD = /^(readme|changelog|license|contributing)\.md$/i; + +// Max bytes to scan for frontmatter before falling back to full read. +const FRONTMATTER_SCAN_BYTES = 2048; +// Max lines to scan for frontmatter end marker. +const FRONTMATTER_SCAN_LINES = 50; class SkillManager { constructor(projectDir) { this.projectDir = projectDir || process.cwd(); - this.skills = new Map(); // name → skill object + this.skills = new Map(); // name → fully-loaded skill object (cached) + this._index = new Map(); // name → IndexEntry (frontmatter + path, no body) this._load(); } @@ -71,14 +87,23 @@ class SkillManager { if (!dir || !fs.existsSync(dir)) return; let entries; try { - entries = fs.readdirSync(dir); + entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; } for (const entry of entries) { - if (!entry.endsWith('.md')) continue; - const full = path.join(dir, entry); - this._ingestFile(full, entry, dir); + if (entry.isDirectory()) { + // drafts/ is quarantined — evolver proposals live there until a + // human promotes them (/evolve promote ). Never auto-load. + if (entry.name === 'drafts') continue; + // //SKILL.md inside a standard skill dir — users following + // the Claude Code layout expect this to work (closes #81) + this._loadSkillFolder(path.join(dir, entry.name), entry.name); + continue; + } + if (!entry.name.endsWith('.md') || NON_SKILL_MD.test(entry.name)) continue; + const full = path.join(dir, entry.name); + this._ingestFile(full, entry.name, dir, entry.name.replace(/\.md$/i, ''), 'flat'); } } @@ -92,106 +117,214 @@ class SkillManager { } for (const d of dirs) { if (!d.isDirectory()) continue; - const skillDir = path.join(root, d.name); - // Look for SKILL.md, skill.md, or any .md file inside the folder. - let skillFile = null; - const candidates = ['SKILL.md', 'skill.md', 'Skill.md']; - for (const c of candidates) { - const p = path.join(skillDir, c); - if (fs.existsSync(p)) { skillFile = p; break; } + this._loadSkillFolder(path.join(root, d.name), d.name); + } + } + + _loadSkillFolder(skillDir, name) { + // Look for SKILL.md, skill.md, or any .md file inside the folder. + let skillFile = null; + const candidates = ['SKILL.md', 'skill.md', 'Skill.md']; + for (const c of candidates) { + const p = path.join(skillDir, c); + if (fs.existsSync(p)) { skillFile = p; break; } + } + if (!skillFile) { + // Fall back to first .md in the folder + try { + const md = fs.readdirSync(skillDir).find(f => f.endsWith('.md')); + if (md) skillFile = path.join(skillDir, md); + } catch {} + } + if (!skillFile) return; + this._ingestFile(skillFile, path.basename(skillFile), skillDir, name, 'nested'); + } + + // Read only enough of the file to extract frontmatter (index-only load). + // Returns { frontmatter: string|null, bodyStart: number } — bodyStart is + // the byte offset where the body begins (after the closing ---). + // Falls back to a full read when the file is small enough or frontmatter + // spans more than FRONTMATTER_SCAN_BYTES. + _readFrontmatterOnly(filePath) { + try { + // Read a limited slice first. + const fd = fs.openSync(filePath, 'r'); + const buf = Buffer.alloc(FRONTMATTER_SCAN_BYTES); + const bytesRead = fs.readSync(fd, buf, 0, FRONTMATTER_SCAN_BYTES, 0); + fs.closeSync(fd); + const chunk = buf.slice(0, bytesRead).toString('utf-8'); + + if (!chunk.startsWith('---')) { + // No frontmatter — full content is body; return null so caller full-reads. + return { frontmatter: null, hasMore: bytesRead === FRONTMATTER_SCAN_BYTES }; + } + + // Find closing --- within FRONTMATTER_SCAN_LINES lines + const lines = chunk.split(/\r?\n/); + let closeIdx = -1; + for (let i = 1; i < Math.min(lines.length, FRONTMATTER_SCAN_LINES); i++) { + if (lines[i].trimEnd() === '---') { closeIdx = i; break; } } - if (!skillFile) { - // Fall back to first .md in the folder - try { - const md = fs.readdirSync(skillDir).find(f => f.endsWith('.md')); - if (md) skillFile = path.join(skillDir, md); - } catch {} + if (closeIdx === -1) { + // Frontmatter not closed within scan window — fall back to full read. + return { frontmatter: null, hasMore: true }; } - if (!skillFile) continue; - this._ingestFile(skillFile, path.basename(skillFile), skillDir, d.name); + + const frontmatter = lines.slice(1, closeIdx).join('\n'); + return { frontmatter, hasMore: bytesRead === FRONTMATTER_SCAN_BYTES }; + } catch { + return { frontmatter: null, hasMore: false }; } } - _ingestFile(filePath, filename, dir, defaultName) { + _ingestFile(filePath, filename, dir, defaultName, origin) { + // Index-only path: read frontmatter cheaply, store as index entry. + // Body is loaded lazily on first get(). + const { frontmatter, hasMore } = this._readFrontmatterOnly(filePath); + + let meta = {}; + if (frontmatter !== null) { + meta = this._parseMeta(frontmatter); + } + + const name = meta.name || defaultName || filename.replace(/\.md$/i, ''); + + const entry = { + name, + trigger: meta.trigger || 'manual', + keywords: Array.isArray(meta.keywords) ? meta.keywords : [], + description: meta.description || '', + tags: Array.isArray(meta.tags) ? meta.tags : [], + related: Array.isArray(meta.related) ? meta.related : [], + path: filePath, + origin: origin || (defaultName ? 'nested' : 'flat'), + // hasFrontmatter: whether the file had a --- block + _hasFrontmatter: frontmatter !== null, + // If the file fits in our scan and has frontmatter, we know + // the body wasn't loaded yet. Track that. + _bodyLoaded: false, + }; + + this._index.set(name, entry); + // Remove any stale cached body for same name (precedence override) + this.skills.delete(name); + } + + _parseMeta(frontmatter) { + const meta = {}; + for (const rawLine of frontmatter.split(/\r?\n/)) { + const m = rawLine.match(KV_RE); + if (!m) continue; + let value = m[2].trim(); + if (value.startsWith('[') && value.endsWith(']')) { + value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); + } + meta[m[1]] = value; + } + return meta; + } + + // Load the full body for a named skill, populate this.skills cache. + _loadBody(name) { + const entry = this._index.get(name); + if (!entry) return null; + if (entry._bodyLoaded && this.skills.has(name)) return this.skills.get(name); + let content; try { - content = fs.readFileSync(filePath, 'utf-8'); + content = fs.readFileSync(entry.path, 'utf-8'); } catch { - return; + return null; } - const skill = this._parse(content, filename, dir, defaultName); - if (skill) this.skills.set(skill.name, skill); - } - _parse(content, filename, dir, defaultName) { - // Parse YAML frontmatter (CRLF + LF tolerant — closes #52) const fmMatch = content.match(FM_RE); - let frontmatter = ''; let body = content; + let meta = {}; if (fmMatch) { - frontmatter = fmMatch[1]; + meta = this._parseMeta(fmMatch[1]); body = fmMatch[2]; - } else if (!defaultName) { - // Flat-layout files without frontmatter aren't skills (could be a - // README). Nested-layout (.agents/skills//SKILL.md) files are - // accepted as plain-body skills using the parent directory name. - return null; + } else if (!entry._hasFrontmatter) { + // No frontmatter — full file is body (manual trigger, named by filename/dir) + body = content; } - // Tiny YAML parser — no dep needed - const meta = {}; - if (frontmatter) { - for (const rawLine of frontmatter.split(/\r?\n/)) { - const m = rawLine.match(KV_RE); - if (!m) continue; - let value = m[2].trim(); - if (value.startsWith('[') && value.endsWith(']')) { - value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); - } - meta[m[1]] = value; - } - } - - return { - name: meta.name || defaultName || filename.replace(/\.md$/i, ''), - trigger: meta.trigger || (defaultName ? 'manual' : 'manual'), - keywords: Array.isArray(meta.keywords) ? meta.keywords : [], + const skill = { + name: meta.name || entry.name, + trigger: meta.trigger || entry.trigger, + keywords: Array.isArray(meta.keywords) ? meta.keywords : entry.keywords, + description: meta.description || entry.description || '', + tags: Array.isArray(meta.tags) ? meta.tags : entry.tags, + related: Array.isArray(meta.related) ? meta.related : entry.related, content: body.trim(), - path: path.join(dir, filename), - origin: defaultName ? 'nested' : 'flat', + path: entry.path, + origin: entry.origin, }; + + entry._bodyLoaded = true; + this.skills.set(name, skill); + return skill; } - // Get all skills + // Get all skills — returns index entries with lazy-loaded bodies for callers + // that need content. list() does NOT load bodies (index only). list() { - return [...this.skills.values()].map(s => ({ - name: s.name, - trigger: s.trigger, - keywords: s.keywords, - preview: s.content.slice(0, 80) + (s.content.length > 80 ? '...' : ''), - origin: s.origin || 'flat', + return [...this._index.values()].map(e => ({ + name: e.name, + trigger: e.trigger, + keywords: e.keywords, + preview: this._getPreview(e), + origin: e.origin || 'flat', })); } - // Get a skill by name + _getPreview(entry) { + // Return preview from cached body if available; otherwise a short placeholder. + if (entry._bodyLoaded && this.skills.has(entry.name)) { + const body = this.skills.get(entry.name).content; + return body.slice(0, 80) + (body.length > 80 ? '...' : ''); + } + // Avoid loading body just for list() — return description or empty + return entry.description || ''; + } + + // Get a skill by name — lazily loads body on first call. get(name) { - return this.skills.get(name) || null; + if (this.skills.has(name)) return this.skills.get(name); + if (!this._index.has(name)) return null; + return this._loadBody(name); } - // Get skills that should auto-inject for a given message + // Get skills that should auto-inject for a given message. + // Only checks index entries (trigger/keywords) — avoids loading bodies + // until caller needs content. getAutoSkills(message) { const msg = (message || '').toLowerCase(); const results = []; - for (const skill of this.skills.values()) { - if (skill.trigger === 'auto') { - results.push(skill); - } else if (skill.trigger === 'match' && skill.keywords.length > 0) { - const match = skill.keywords.some(kw => msg.includes(String(kw).toLowerCase())); - if (match) results.push(skill); + for (const entry of this._index.values()) { + if (entry.trigger === 'auto') { + results.push(this._loadBody(entry.name)); + } else if (entry.trigger === 'match' && entry.keywords.length > 0) { + const match = entry.keywords.some(kw => msg.includes(String(kw).toLowerCase())); + if (match) results.push(this._loadBody(entry.name)); } } - return results; + return results.filter(Boolean); + } + + // Return flat IndexEntry list for prompt injection (no bodies loaded). + // { name, description, trigger, keywords, tags, related, path, origin } + getIndex() { + return [...this._index.values()].map(e => ({ + name: e.name, + description: e.description, + trigger: e.trigger, + keywords: e.keywords, + tags: e.tags, + related: e.related, + path: e.path, + origin: e.origin, + })); } // Create a new skill in the project's .smallcode/skills directory @@ -220,21 +353,55 @@ class SkillManager { name, trigger, keywords, + description: options.description || '', + tags: options.tags || [], + related: options.related || [], content, path: filePath, origin: 'flat', + _hasFrontmatter: true, + _bodyLoaded: true, }; + this._index.set(name, skill); this.skills.set(name, skill); return skill; } + // Promote a quarantined draft (.smallcode/skills/drafts/.md) into + // the live project skill dir and load it. Returns the new path or null. + promoteDraft(name) { + const safe = String(name || '').replace(/[^a-z0-9-_]/gi, ''); + if (!safe) return null; + const draftsDir = path.join(this.projectDir, '.smallcode', 'skills', 'drafts'); + const source = path.join(draftsDir, `${safe}.md`); + if (!fs.existsSync(source)) return null; + const target = path.join(this.projectDir, '.smallcode', 'skills', `${safe}.md`); + if (fs.existsSync(target)) return null; // never overwrite a live skill + fs.renameSync(source, target); + this._ingestFile(target, `${safe}.md`, path.dirname(target), safe, 'flat'); + return target; + } + + // List quarantined drafts (names only) + listDrafts() { + const draftsDir = path.join(this.projectDir, '.smallcode', 'skills', 'drafts'); + try { + return fs.readdirSync(draftsDir) + .filter(f => f.endsWith('.md')) + .map(f => f.replace(/\.md$/i, '')); + } catch { + return []; + } + } + // Remove a skill remove(name) { - const skill = this.skills.get(name); - if (!skill) return false; - if (fs.existsSync(skill.path)) { - try { fs.unlinkSync(skill.path); } catch {} + const entry = this._index.get(name) || this.skills.get(name); + if (!entry) return false; + if (fs.existsSync(entry.path)) { + try { fs.unlinkSync(entry.path); } catch {} } + this._index.delete(name); this.skills.delete(name); return true; } diff --git a/src/plugins/team_loader.js b/src/plugins/team_loader.js new file mode 100644 index 00000000..ad8c8375 --- /dev/null +++ b/src/plugins/team_loader.js @@ -0,0 +1,103 @@ +// SmallCode — Team Loader +// Loads team definitions from .smallcode/teams/.yaml +// +// YAML format (tiny parser — NO yaml dep): +// name: my-team +// description: short description +// agents: [agent-a, agent-b] +// +// Only parses top-level scalar keys and inline array lists. +// Drafts quarantine: teams/drafts/ is never auto-loaded (Phase 3 parity). + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +// Reuse KV_RE style from skills.js / agent_loader.js +const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; + +class TeamLoader { + constructor(projectDir) { + this.projectDir = projectDir || process.cwd(); + this._teams = new Map(); + this._load(); + } + + _teamDir() { + return path.join(this.projectDir, '.smallcode', 'teams'); + } + + _bundledDir() { + return path.join(__dirname, '..', '..', 'teams'); + } + + _parseLine(line) { + const m = line.trim().match(KV_RE); + if (!m) return null; + let value = m[2].trim(); + if (value.startsWith('[') && value.endsWith(']')) { + value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); + } + return { key: m[1], value }; + } + + _parse(content, defaultName) { + const result = { name: defaultName, description: '', agents: [] }; + for (const rawLine of content.split(/\r?\n/)) { + const parsed = this._parseLine(rawLine); + if (!parsed) continue; + if (parsed.key === 'name') result.name = String(parsed.value); + else if (parsed.key === 'description') result.description = String(parsed.value); + else if (parsed.key === 'agents') result.agents = Array.isArray(parsed.value) ? parsed.value : [String(parsed.value)]; + } + return result; + } + + _loadDir(dir) { + if (!fs.existsSync(dir)) return; + let entries; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + // Skip drafts/ directory — quarantine parity with skills/agents + if (entry.isDirectory() && entry.name === 'drafts') continue; + if (entry.isDirectory()) continue; + if (!entry.name.endsWith('.yaml') && !entry.name.endsWith('.yml')) continue; + const filePath = path.join(dir, entry.name); + const defaultName = entry.name.replace(/\.(yaml|yml)$/i, ''); + let content; + try { + content = fs.readFileSync(filePath, 'utf-8'); + } catch { + continue; + } + const team = this._parse(content, defaultName); + team.path = filePath; + this._teams.set(team.name, team); + } + } + + _load() { + // Bundled defaults first; project-level overrides (Map.set overwrites same name) + this._loadDir(this._bundledDir()); + this._loadDir(this._teamDir()); + } + + list() { + return [...this._teams.values()].map(t => ({ + name: t.name, + description: t.description, + agents: t.agents, + })); + } + + get(name) { + return this._teams.get(name) || null; + } +} + +module.exports = { TeamLoader }; diff --git a/src/plugins/team_runner.js b/src/plugins/team_runner.js new file mode 100644 index 00000000..43404b22 --- /dev/null +++ b/src/plugins/team_runner.js @@ -0,0 +1,50 @@ +// SmallCode — Team Runner +// Sequential pipeline: each agent's output becomes the next agent's task input. +// No parallelism — local inference performance trap. +// +// Returns: { output, steps, tokens, perAgent: [{name, steps, tokens, error?}] } + +'use strict'; + +const { AgentLoader } = require('./agent_loader'); +const { AgentRunner } = require('./agent_runner'); + +/** + * Run a team pipeline. + * @param {object} teamDef - { name, agents: string[] } + * @param {string} task - Initial task for the first agent + * @param {object} ctx - { config, flags, tui, skillManager } + * @param {AgentLoader} agentLoader - Loader to resolve agent definitions + * @returns {Promise<{output: string, steps: number, tokens: number, perAgent: object[]}>} + */ +async function runTeam(teamDef, task, ctx, agentLoader) { + const perAgent = []; + let currentTask = task; + let totalSteps = 0; + let totalTokens = 0; + + for (const agentName of (teamDef.agents || [])) { + const agentDef = agentLoader.get(agentName); + if (!agentDef) { + const result = { name: agentName, steps: 0, tokens: 0, error: `Agent "${agentName}" not found` }; + perAgent.push(result); + // Propagate as task for next agent so the pipeline can continue + currentTask = `[error from ${agentName}: ${result.error}] ${currentTask}`; + continue; + } + + const runner = new AgentRunner(agentDef, ctx); + const result = await runner.run(currentTask); + + perAgent.push({ name: agentName, steps: result.steps, tokens: result.tokens, error: result.error }); + totalSteps += result.steps; + totalTokens += result.tokens; + + // Next agent's input = this agent's output (pipeline semantics) + currentTask = result.output || `(${agentName} produced no output)`; + } + + return { output: currentTask, steps: totalSteps, tokens: totalTokens, perAgent }; +} + +module.exports = { runTeam }; diff --git a/src/tools/mcp_client.js b/src/tools/mcp_client.js index 04ddc3a3..537c7fef 100644 --- a/src/tools/mcp_client.js +++ b/src/tools/mcp_client.js @@ -46,6 +46,10 @@ class MCPClient { const servers = content.mcpServers || {}; for (const [name, cfg] of Object.entries(servers)) { if (cfg.disabled) continue; + // Skip a self-referential entry that relaunches smallcode in --mcp + // mode. Combined with the host-side guard, this prevents the fork + // bomb from issue #82 even if a stale/bad mcp.json registers it. + if (MCPClient._isSelfReference(cfg)) continue; this.servers.set(name, { config: { name, @@ -65,6 +69,20 @@ class MCPClient { return this.servers.size; } + /** + * Detect a server config that would relaunch SmallCode itself as an MCP + * server (`smallcode --mcp`, `node smallcode.js --mcp`, `npx smallcode --mcp`, + * `smolv2 --mcp`, …). Spawning these from the MCP client is what produced the + * runaway process fork bomb in issue #82. + */ + static _isSelfReference(cfg) { + if (!cfg) return false; + const args = Array.isArray(cfg.args) ? cfg.args : []; + if (!args.includes('--mcp')) return false; + const hay = [cfg.command || '', ...args].join(' ').toLowerCase(); + return /\bsmallcode\b|smallcode\.js|\bsmolv2\b/.test(hay); + } + /** * Connect to all configured servers and discover their tools. * Returns number of tools discovered. diff --git a/src/tools/tool_aliases.js b/src/tools/tool_aliases.js new file mode 100644 index 00000000..f487e8cd --- /dev/null +++ b/src/tools/tool_aliases.js @@ -0,0 +1,198 @@ +'use strict'; + +// SmallCode — Tool alias layer +// +// Maps OpenAI/Claude-style tool names that small models (e.g. minimax) tend +// to hallucinate onto SmallCode's real built-in tools, re-keying argument +// names as needed. Unknown names pass through unchanged. +// +// Usage (see bin/smallcode.js wiring): +// const { normalizeToolCall } = require('../src/tools/tool_aliases'); +// message.tool_calls = message.tool_calls.map(normalizeToolCall); + +// Real tool names that must NEVER be shadowed by an alias. +// If the incoming name is already one of these, pass through unchanged. +const REAL_TOOLS = new Set([ + 'read_file', 'write_file', 'patch', 'bash', + 'search', 'find_files', 'memory_remember', 'memory_recall', + 'select_category', 'done', +]); + +/** + * ALIASES maps lower-cased alias names to: + * { tool: , mapArgs: (parsedArgs) => remappedArgs } + * + * mapArgs receives a plain object (already JSON-parsed) and returns a new + * plain object with keys renamed according to the alias spec. + */ +const ALIASES = { + // ── read_file ───────────────────────────────────────────────────────────── + read: { + tool: 'read_file', + mapArgs(a) { + const out = { ...a }; + // file_path / filepath → path + if ('file_path' in out && !('path' in out)) { out.path = out.file_path; delete out.file_path; } + if ('filepath' in out && !('path' in out)) { out.path = out.filepath; delete out.filepath; } + // line / offset → start_line (keep start_line/end_line as-is) + if ('line' in out && !('start_line' in out)) { out.start_line = out.line; delete out.line; } + if ('offset' in out && !('start_line' in out)) { out.start_line = out.offset; delete out.offset; } + return out; + }, + }, + view: { + tool: 'read_file', + mapArgs(a) { return ALIASES.read.mapArgs(a); }, + }, + + // ── write_file ───────────────────────────────────────────────────────────── + write: { + tool: 'write_file', + mapArgs(a) { + const out = { ...a }; + if ('file_path' in out && !('path' in out)) { out.path = out.file_path; delete out.file_path; } + return out; + }, + }, + create_file: { + tool: 'write_file', + mapArgs(a) { return ALIASES.write.mapArgs(a); }, + }, + create: { + tool: 'write_file', + mapArgs(a) { return ALIASES.write.mapArgs(a); }, + }, + + // ── patch ────────────────────────────────────────────────────────────────── + edit: { + tool: 'patch', + mapArgs(a) { + const out = { ...a }; + if ('file_path' in out && !('path' in out)) { out.path = out.file_path; delete out.file_path; } + if ('old_string' in out && !('old_str' in out)) { out.old_str = out.old_string; delete out.old_string; } + if ('new_string' in out && !('new_str' in out)) { out.new_str = out.new_string; delete out.new_string; } + return out; + }, + }, + str_replace: { + tool: 'patch', + mapArgs(a) { return ALIASES.edit.mapArgs(a); }, + }, + str_replace_editor: { + tool: 'patch', + mapArgs(a) { return ALIASES.edit.mapArgs(a); }, + }, + replace: { + tool: 'patch', + mapArgs(a) { return ALIASES.edit.mapArgs(a); }, + }, + + // ── bash ─────────────────────────────────────────────────────────────────── + bash: { + tool: 'bash', + mapArgs(a) { + const out = { ...a }; + if ('cmd' in out && !('command' in out)) { out.command = out.cmd; delete out.cmd; } + return out; + }, + }, + shell: { + tool: 'bash', + mapArgs(a) { return ALIASES.bash.mapArgs(a); }, + }, + run_command: { + tool: 'bash', + mapArgs(a) { return ALIASES.bash.mapArgs(a); }, + }, + + // ── search ───────────────────────────────────────────────────────────────── + grep: { + tool: 'search', + mapArgs(a) { + const out = { ...a }; + if ('query' in out && !('pattern' in out)) { out.pattern = out.query; delete out.query; } + return out; + }, + }, + + // ── find_files ───────────────────────────────────────────────────────────── + glob: { + tool: 'find_files', + mapArgs(a) { + const out = { ...a }; + if ('query' in out && !('pattern' in out)) { out.pattern = out.query; delete out.query; } + return out; + }, + }, + ls: { + tool: 'find_files', + mapArgs(a) { + const dir = (a && a.path) ? String(a.path).replace(/[\\/]+$/, '') : '.'; + return { pattern: dir + '/*' }; + }, + }, + list_dir: { + tool: 'find_files', + mapArgs(a) { return ALIASES.ls.mapArgs(a); }, + }, + list_directory: { + tool: 'find_files', + mapArgs(a) { return ALIASES.ls.mapArgs(a); }, + }, +}; + +// Also register upper-case variants used by Claude Code tooling (Read, Write, +// Edit, Bash, Grep, Glob, LS) — identical mapArgs, just different key casing. +// We do this by normalising to lower-case before lookup, so no extra entries +// are needed (see normalizeToolCall below). + +/** + * Normalize a single OpenAI-shape tool_call. + * + * @param {{ function: { name: string, arguments: string } }} toolCall + * @returns {object} A new tool_call with real name + remapped args, or the + * original object if no alias matched. + */ +function normalizeToolCall(toolCall) { + if (!toolCall || !toolCall.function) return toolCall; + + const rawName = toolCall.function.name; + if (typeof rawName !== 'string') return toolCall; + + // If the name is already an exact match to a real tool, don't touch it. + if (REAL_TOOLS.has(rawName)) return toolCall; + + const key = rawName.toLowerCase(); + + const alias = ALIASES[key]; + if (!alias) return toolCall; // unknown name — pass through unchanged + + // Parse args (robust to malformed JSON). + let parsedArgs; + try { + parsedArgs = JSON.parse(toolCall.function.arguments || '{}'); + if (typeof parsedArgs !== 'object' || parsedArgs === null) parsedArgs = {}; + } catch { + // Malformed JSON — rename the tool but keep args string as-is + return { + ...toolCall, + function: { + ...toolCall.function, + name: alias.tool, + }, + }; + } + + const remappedArgs = alias.mapArgs(parsedArgs); + + return { + ...toolCall, + function: { + ...toolCall.function, + name: alias.tool, + arguments: JSON.stringify(remappedArgs), + }, + }; +} + +module.exports = { ALIASES, REAL_TOOLS, normalizeToolCall }; diff --git a/src/tools/two_stage_router.js b/src/tools/two_stage_router.js index cd5f4e41..36a47968 100644 --- a/src/tools/two_stage_router.js +++ b/src/tools/two_stage_router.js @@ -28,11 +28,15 @@ const TOOL_CATEGORIES = { tools: ['bash', 'run'], }, plan: { - description: 'Load/save project memory, BoneScript compile/check', - tools: ['memory_load', 'memory_remember', 'bone_compile', 'bone_check'], + description: 'Load/save project memory, load skills, spawn agents, BoneScript compile/check', + tools: ['memory_load', 'memory_remember', 'use_skill', 'bone_compile', 'bone_check', 'spawn_agent'], }, }; +// Cross-cutting tools appended to every category in Stage 2 — the skill +// index is injected on every turn, so use_skill must always be callable. +const ALWAYS_TOOLS = ['use_skill']; + /** * Determine routing mode based on model's context window. * @param {number} contextWindow - Model's context length in tokens @@ -80,7 +84,7 @@ function getCategorySelectorTool() { function getToolsForCategory(category, allTools) { const cat = TOOL_CATEGORIES[category]; if (!cat) return allTools; // Unknown category, fall back to all - return allTools.filter(t => cat.tools.includes(t.function.name)); + return allTools.filter(t => cat.tools.includes(t.function.name) || ALWAYS_TOOLS.includes(t.function.name)); } /** diff --git a/src/tui/fullscreen.js b/src/tui/fullscreen.js index 6a4c935f..20153477 100644 --- a/src/tui/fullscreen.js +++ b/src/tui/fullscreen.js @@ -54,6 +54,22 @@ function visualCursorPosition(str, cursorIdx, maxVisualWidth) { return { line, col }; } +// Word-boundary helpers for input editing (issue #93). A "word" is a run of +// non-whitespace characters. Movement skips any whitespace adjacent to the +// cursor before scanning over the word, mirroring readline/Windows behaviour. +function prevWordBoundary(str, idx) { + let i = idx; + while (i > 0 && /\s/.test(str[i - 1])) i--; // skip whitespace to the left + while (i > 0 && !/\s/.test(str[i - 1])) i--; // skip the word itself + return i; +} +function nextWordBoundary(str, idx) { + let i = idx; + while (i < str.length && /\s/.test(str[i])) i++; // skip whitespace to the right + while (i < str.length && !/\s/.test(str[i])) i++; // skip the word itself + return i; +} + // ─── ANSI Escape Sequences ─────────────────────────────────────────────────── const ESC = '\x1b['; @@ -161,6 +177,14 @@ class FullScreenTUI { // Panel content buffers this.chatLines = []; // Rendered chat messages + this._chatTrim = 0; // count of chatLines trimmed off the front (issue #77 toolEnd anchoring) + this.contextMeter = ''; // live context-usage indicator (issue #77) + + // Mouse text selection in the chat panel (drag to highlight, copy on + // release). Anchored to chatLines indices so scrolling doesn't shift it. + this.selection = null; // { anchor: {line, col}, head: {line, col} } + this._selecting = false; + this._lastDragY = null; // previous drag row — edge-dwell detection this.toolLines = []; // Tool execution log this.inputBuffer = ''; // Current user input this.inputCursor = 0; // Cursor position in input @@ -176,10 +200,12 @@ class FullScreenTUI { { cmd: '/quit', alias: '/q', desc: 'Exit SmallCode' }, { cmd: '/clear', alias: null, desc: 'Reset conversation' }, { cmd: '/model', alias: null, desc: 'Show/switch model' }, + { cmd: '/provider', alias: null, desc: 'Show provider / configure model' }, { cmd: '/endpoint', alias: null, desc: 'Switch API endpoint' }, { cmd: '/stats', alias: null, desc: 'Session statistics' }, { cmd: '/tokens', alias: null, desc: 'Token usage report' }, { cmd: '/budget', alias: null, desc: 'Context window budget' }, + { cmd: '/live', alias: null, desc: 'Toggle live activity feed' }, { cmd: '/files', alias: null, desc: 'List project files' }, { cmd: '/diff', alias: null, desc: 'Git diff summary' }, { cmd: '/git', alias: null, desc: 'Run git command' }, @@ -192,6 +218,11 @@ class FullScreenTUI { { cmd: '/cognition', alias: null, desc: 'MarrowScript cognition status' }, { cmd: '/mcp', alias: null, desc: 'Connected MCP servers' }, { cmd: '/skill', alias: null, desc: 'Manage reusable skills' }, + { cmd: '/agents', alias: null, desc: 'List defined sub-agents' }, + { cmd: '/agent', alias: null, desc: 'Run a sub-agent manually' }, + { cmd: '/teams', alias: null, desc: 'List defined agent teams' }, + { cmd: '/team', alias: null, desc: 'Run a team pipeline' }, + { cmd: '/evolve', alias: null, desc: 'Propose skill from session friction' }, { cmd: '/plugin', alias: null, desc: 'Manage plugins' }, { cmd: '/sessions', alias: null, desc: 'List/resume sessions' }, { cmd: '/session', alias: null, desc: 'Parallel sessions' }, @@ -332,7 +363,8 @@ class FullScreenTUI { for (let i = 0; i < this.chatHeight; i++) { buf += ANSI.moveTo(i + 1, 1); - const line = visible[i] || ''; + let line = visible[i] || ''; + if (this.selection) line = this._highlightSelection(startLine + i, line); buf += fitAnsi(line, this.chatWidth); } @@ -616,9 +648,12 @@ class FullScreenTUI { actionStr = ' enter send │ /help commands'; } - // 2. Middle: Scroll & Token info + // 2. Middle: Scroll & Token info (+ live context meter — issue #77) let scrollStr = this.chatScroll < 0 ? '↑ scrolled' : ''; let tokenStr = this.tokenInfo ? `${this.tokenInfo}` : ''; + if (this.contextMeter) { + tokenStr = tokenStr ? `${this.contextMeter} │ ${tokenStr}` : this.contextMeter; + } let middleStr = ''; if (scrollStr && tokenStr) { middleStr = `${scrollStr} │ ${tokenStr}`; @@ -879,6 +914,60 @@ class FullScreenTUI { return; } + // ─── Line / word navigation (issue #93) ────────────────────────────── + // Home / Ctrl+A — start of line. Terminals send Home as \x1b[H, \x1b[1~, + // or \x1bOH depending on mode; Ctrl+A arrives as the raw byte \x01. + if (key === '\x1b[H' || key === '\x1b[1~' || key === '\x1bOH' || key === '\x01') { + this.inputCursor = 0; + this.render(); + return; + } + // End / Ctrl+E — end of line (\x1b[F, \x1b[4~, \x1bOF, or Ctrl+E = \x05). + if (key === '\x1b[F' || key === '\x1b[4~' || key === '\x1bOF' || key === '\x05') { + this.inputCursor = this.inputBuffer.length; + this.render(); + return; + } + // Ctrl+Left — previous word (\x1b[1;5D, and Alt+B = \x1bb as a fallback). + if (key === '\x1b[1;5D' || key === '\x1b[1;3D' || key === '\x1bb') { + this.inputCursor = prevWordBoundary(this.inputBuffer, this.inputCursor); + this.render(); + return; + } + // Ctrl+Right — next word (\x1b[1;5C, and Alt+F = \x1bf as a fallback). + if (key === '\x1b[1;5C' || key === '\x1b[1;3C' || key === '\x1bf') { + this.inputCursor = nextWordBoundary(this.inputBuffer, this.inputCursor); + this.render(); + return; + } + // Ctrl+Backspace / Ctrl+W — delete the word to the left of the cursor. + // Ctrl+Backspace reaches us as \x17 (Ctrl+W) or \x1b\x7f on many terminals. + if (key === '\x17' || key === '\x1b\x7f' || key === '\x1b\b') { + const start = prevWordBoundary(this.inputBuffer, this.inputCursor); + this.inputBuffer = this.inputBuffer.slice(0, start) + this.inputBuffer.slice(this.inputCursor); + this.inputCursor = start; + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + this.render(); + return; + } + // Ctrl+Delete — delete the word to the right of the cursor (\x1b[3;5~). + if (key === '\x1b[3;5~' || key === '\x1b[3;3~') { + const end = nextWordBoundary(this.inputBuffer, this.inputCursor); + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + this.inputBuffer.slice(end); + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + this.render(); + return; + } + // Delete (forward) — remove the character under the cursor (\x1b[3~). + if (key === '\x1b[3~') { + if (this.inputCursor < this.inputBuffer.length) { + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + this.inputBuffer.slice(this.inputCursor + 1); + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + } + this.render(); + return; + } + // Scroll chat — PgUp/PgDn, Shift+Up/Down, mouse wheel if (key === '\x1b[5~' || key === '\x1b[1;2A') { // PgUp or Shift+Up const maxBack = -(Math.max(0, this.chatLines.length - this.chatHeight)); @@ -905,6 +994,20 @@ class FullScreenTUI { this.render(); return; } + // Right-click — paste from clipboard (issue #96). Enabling SGR mouse + // tracking makes the terminal forward right-clicks to us instead of + // showing its native paste menu, so we honour the gesture ourselves. + // SGR button 2 (right) press is "\x1b[<2;X;YM", release "\x1b[<2;X;Ym". + if (/^\x1b\[<2;\d+;\d+m$/.test(key)) { + this._pasteFromClipboard(); + return; + } + + // Mouse press / drag / release (SGR) — text selection in the chat panel. + // Only the chat region selects; tool panel and input area are ignored. + if (key.includes('\x1b[<')) { + if (this._onMouseSelect(key)) return; + } // Ctrl+L — clear and redraw if (key === '\x0c') { @@ -912,27 +1015,9 @@ class FullScreenTUI { return; } - // Ctrl+V — paste from clipboard (Windows) + // Ctrl+V — paste from clipboard (issue #96: right-click also routes here) if (key === '\x16') { - try { - const { execSync } = require('child_process'); - let clipboard = ''; - if (process.platform === 'win32') { - clipboard = execSync('powershell -command "Get-Clipboard"', { encoding: 'utf-8', timeout: 3000 }).trim(); - } else if (process.platform === 'darwin') { - clipboard = execSync('pbpaste', { encoding: 'utf-8', timeout: 3000 }).trim(); - } else { - clipboard = execSync('xclip -selection clipboard -o 2>/dev/null || xsel --clipboard --output 2>/dev/null', { encoding: 'utf-8', timeout: 3000, shell: true }).trim(); - } - if (clipboard) { - // Replace newlines with spaces for input line - const text = clipboard.replace(/[\r\n]+/g, ' '); - this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + text + this.inputBuffer.slice(this.inputCursor); - this.inputCursor += text.length; - this.commandPaletteOpen = this.inputBuffer.startsWith('/'); - this.render(); - } - } catch {} + this._pasteFromClipboard(); return; } @@ -959,6 +1044,30 @@ class FullScreenTUI { } } + // Insert clipboard contents at the cursor. Shared by Ctrl+V and the + // right-click gesture (issue #96). Newlines collapse to spaces so the + // single-line input stays intact. + _pasteFromClipboard() { + try { + const { execSync } = require('child_process'); + let clipboard = ''; + if (process.platform === 'win32') { + clipboard = execSync('powershell -command "Get-Clipboard"', { encoding: 'utf-8', timeout: 3000 }).trim(); + } else if (process.platform === 'darwin') { + clipboard = execSync('pbpaste', { encoding: 'utf-8', timeout: 3000 }).trim(); + } else { + clipboard = execSync('xclip -selection clipboard -o 2>/dev/null || xsel --clipboard --output 2>/dev/null', { encoding: 'utf-8', timeout: 3000, shell: true }).trim(); + } + if (clipboard) { + const text = clipboard.replace(/[\r\n]+/g, ' '); + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + text + this.inputBuffer.slice(this.inputCursor); + this.inputCursor += text.length; + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + this.render(); + } + } catch {} + } + _onResize() { this._computeLayout(); this.render(); @@ -1021,6 +1130,7 @@ class FullScreenTUI { // thousands of lines; rendering stays fast by only keeping recent history. const MAX_CHAT_LINES = 5000; if (this.chatLines.length > MAX_CHAT_LINES) { + this._chatTrim += this.chatLines.length - MAX_CHAT_LINES; this.chatLines.splice(0, this.chatLines.length - MAX_CHAT_LINES); } @@ -1052,6 +1162,69 @@ class FullScreenTUI { this.render(); } + // Live in-progress tool line (issue #77). Pushes a ⚙ line to chat + tool + // panel and returns a handle so toolEnd() can rewrite it in place once the + // tool finishes — so the user sees "⚙ write_file: x.py" the moment it starts, + // not only the ✓ after it completes. The handle records absolute indices plus + // the trim offset at creation, so front-trimming of chatLines stays correct. + toolStart(name, detail) { + const iconColor = this.theme.accent; + const prefix = iconColor + ' TOOL ⚙ ' + this.theme.border + '│ ' + ANSI.reset; + const nameStr = name ? this.theme.accent + name + ANSI.reset + ': ' : ''; + const detailStr = (detail ? this.theme.muted + detail : this.theme.muted + 'running…') + ANSI.reset; + + const line = prefix + nameStr + detailStr; + const toolPanelLine = ` ${iconColor}⚙${ANSI.reset} ${nameStr}${detailStr}`; + const handle = { name, chatIdx: this.chatLines.length, toolIdx: this.toolLines.length, trim: this._chatTrim }; + + this.chatLines.push(line); + this.toolLines.push(toolPanelLine); + this.chatScroll = 0; + this.render(); + return handle; + } + + // Finish a live tool line started by toolStart(): rewrite it to ✓/✗ in place. + // Falls back to appending a fresh line (addTool) if the original scrolled out + // of the retained window or no handle was supplied. + toolEnd(handle, status, detail) { + if (!handle || handle.chatIdx == null) { this.addTool(handle && handle.name, status, detail); return; } + + let icon = '⚙', iconColor = this.theme.accent; + if (status === 'ok') { icon = '✓'; iconColor = this.theme.success; } + else if (status === 'err') { icon = '✗'; iconColor = this.theme.error; } + + const name = handle.name; + const prefix = iconColor + ' TOOL ' + icon + ' ' + this.theme.border + '│ ' + ANSI.reset; + const nameStr = name ? this.theme.accent + name + ANSI.reset + ': ' : ''; + const detailStr = detail ? this.theme.muted + detail + ANSI.reset : ''; + const line = prefix + nameStr + detailStr; + const toolPanelLine = ` ${iconColor}${icon}${ANSI.reset} ${nameStr}${detailStr}`; + + const chatIdx = handle.chatIdx - (this._chatTrim - (handle.trim || 0)); + if (chatIdx >= 0 && chatIdx < this.chatLines.length) { + this.chatLines[chatIdx] = line; + } else { + this.chatLines.push(line); // scrolled out of the retained window + } + if (handle.toolIdx != null && handle.toolIdx < this.toolLines.length) { + this.toolLines[handle.toolIdx] = toolPanelLine; + } else { + this.toolLines.push(toolPanelLine); + } + this.render(); + } + + // Live context-usage meter (issue #77). `pct` is 0-100; used/window are token + // counts. Rendered in the status footer alongside the token info. + setContextMeter(pct, used, window) { + if (pct == null) { this.contextMeter = ''; this.render(); return; } + const p = Math.max(0, Math.min(100, Math.round(pct))); + const fmt = (n) => n >= 1000 ? `${(n / 1000).toFixed(1)}k` : String(n); + this.contextMeter = window ? `ctx ${p}% (${fmt(used)}/${fmt(window)})` : `ctx ${p}%`; + this.render(); + } + // Show a diff in the chat panel (non-blocking, inline) addDiff(filePath, oldStr, newStr, lineNum) { const t = this.theme; @@ -1131,12 +1304,163 @@ class FullScreenTUI { this.render(); } + // Live dimmed reasoning preview (issue #77, Phase B). Streams thinking tokens + // into a single collapsing dimmed line so the user can watch the model reason + // without flooding the chat. Reset by endStream() at turn boundaries. + streamThinking(token) { + const dim = '\x1b[2m'; + const prefix = ' ' + this.theme.border + '│ ' + ANSI.reset + dim + '[thinking] '; + if (this._thinkingLineIdx == null || this._thinkingLineIdx >= this.chatLines.length) { + this._thinkingLineIdx = this.chatLines.length; + this._thinkingText = ''; + this.chatLines.push(prefix + ANSI.reset); + } + this._thinkingText += token; + const tail = this._thinkingText.replace(/\s+/g, ' ').trim().slice(-120); + this.chatLines[this._thinkingLineIdx] = prefix + tail + ANSI.reset; + this.chatScroll = 0; + this.render(); + } + endStream() { this._lastLineIsStreaming = false; + this._thinkingLineIdx = null; this.chatLines.push(''); this.render(); } + // ─── Mouse selection ───────────────────────────────────────────────── + + // Handle SGR mouse events for chat-panel text selection. + // Returns true when the chunk was consumed as selection input. + _onMouseSelect(data) { + const events = [...data.matchAll(/\x1b\[<(\d+);(\d+);(\d+)([Mm])/g)]; + if (events.length === 0) return false; + + let handled = false; + for (const ev of events) { + const btn = parseInt(ev[1]); + const x = parseInt(ev[2]); // 1-based column + const y = parseInt(ev[3]); // 1-based row + const isRelease = ev[4] === 'm'; + + // Left press inside the chat panel — start selecting + if (btn === 0 && !isRelease && !this._selecting) { + if (x <= this.chatWidth && y <= this.chatHeight) { + const pos = this._chatPosAt(x, y); + this.selection = { anchor: pos, head: pos }; + this._selecting = true; + this._lastDragY = null; + handled = true; + } else { + // Click outside the chat panel clears any old highlight + if (this.selection) { this.selection = null; this.render(); } + } + continue; + } + // Drag with left button held — extend selection. Staying at the + // panel's top/bottom edge (repeated edge events) auto-scrolls so the + // selection can extend beyond the visible window; merely reaching the + // edge row selects it without scrolling. + if (btn === 32 && this._selecting) { + const prevY = this._lastDragY; + this._lastDragY = y; + if (y <= 1 && prevY !== null && prevY <= 1) { + const maxBack = -(Math.max(0, this.chatLines.length - this.chatHeight)); + this.chatScroll = Math.max(maxBack, this.chatScroll - 1) || 0; // || 0 normalizes -0 + } else if (y > this.chatHeight || (y === this.chatHeight && prevY !== null && prevY >= this.chatHeight)) { + this.chatScroll = Math.min(0, this.chatScroll + 1); + } + this.selection.head = this._chatPosAt( + Math.min(x, this.chatWidth), + Math.max(1, Math.min(y, this.chatHeight)) + ); + handled = true; + continue; + } + // Release — copy and clear + if (btn === 0 && isRelease && this._selecting) { + this._selecting = false; + this._lastDragY = null; + const text = this._extractSelection(); + this.selection = null; + if (text) { + this._copyToClipboard(text); + const lines = text.split('\n').length; + this.addTool('clipboard', 'ok', `copied ${lines} line${lines === 1 ? '' : 's'}`); + } + handled = true; + } + } + if (handled) this.render(); + return handled; + } + + // Map a terminal (x, y) inside the chat panel to a chatLines position. + _chatPosAt(x, y) { + const startLine = Math.max(0, this.chatLines.length - this.chatHeight + this.chatScroll); + return { line: startLine + (y - 1), col: x - 1 }; + } + + // Chat lines carry a fixed 10-char gutter (8-char role label + '│ '). + // Selection clamps to the text area so the gutter never highlights or + // copies; a drag starting in the gutter selects from the text start. + static CHAT_GUTTER = 10; + + // Selection with anchor/head ordered top-to-bottom. + _normalizedSelection() { + if (!this.selection) return null; + const { anchor: a, head: h } = this.selection; + if (a.line < h.line || (a.line === h.line && a.col <= h.col)) { + return { start: a, end: h }; + } + return { start: h, end: a }; + } + + // Plain text covered by the current selection. + _extractSelection() { + const sel = this._normalizedSelection(); + if (!sel) return ''; + const gutter = FullScreenTUI.CHAT_GUTTER; + const out = []; + for (let i = sel.start.line; i <= sel.end.line; i++) { + if (i < 0 || i >= this.chatLines.length) continue; + const plain = this._stripAnsi(this.chatLines[i] || ''); + const from = Math.max(gutter, i === sel.start.line ? sel.start.col : 0); + const to = i === sel.end.line ? sel.end.col + 1 : plain.length; + out.push(to > from ? plain.slice(from, to).replace(/\s+$/, '') : ''); + } + return out.join('\n').replace(/\n+$/, ''); + } + + // Apply inverse-video highlight to the selected span of a chat line. + // Works on the ANSI-stripped text — colors drop while selected, which is + // the standard tradeoff for span-accurate highlighting. + _highlightSelection(lineIdx, line) { + const sel = this._normalizedSelection(); + if (!sel || lineIdx < sel.start.line || lineIdx > sel.end.line) return line; + const gutter = FullScreenTUI.CHAT_GUTTER; + const plain = this._stripAnsi(line); + const from = Math.max(gutter, Math.min( + lineIdx === sel.start.line ? sel.start.col : 0, plain.length)); + const to = lineIdx === sel.end.line ? Math.min(sel.end.col + 1, plain.length) : plain.length; + if (from >= to) return line; + return plain.slice(0, from) + '\x1b[7m' + plain.slice(from, to) + '\x1b[27m' + plain.slice(to); + } + + _copyToClipboard(text) { + try { + const { execSync } = require('child_process'); + if (process.platform === 'win32') { + execSync('powershell -noprofile -command "$input | Set-Clipboard"', { input: text, timeout: 3000 }); + } else if (process.platform === 'darwin') { + execSync('pbcopy', { input: text, timeout: 3000 }); + } else { + execSync('xclip -selection clipboard 2>/dev/null || xsel --clipboard --input 2>/dev/null', { input: text, timeout: 3000, shell: true }); + } + } catch {} + } + // ─── Utilities ─────────────────────────────────────────────────────── _truncate(str, maxLen) { diff --git a/src/tui/terminal.js b/src/tui/terminal.js index 7ae5583f..11913dc7 100644 --- a/src/tui/terminal.js +++ b/src/tui/terminal.js @@ -24,8 +24,10 @@ const SEQ = { hideCursor: '\x1b[?25l', showCursor: '\x1b[?25h', reset: '\x1b[0m', - mouseOn: '\x1b[?1000h\x1b[?1006h', // button tracking + SGR encoding - mouseOff: '\x1b[?1000l\x1b[?1006l', + // 1000 = presses/releases/wheel, 1002 = also motion while a button is held + // (needed for drag-selection in the chat panel), 1006 = SGR encoding + mouseOn: '\x1b[?1000h\x1b[?1002h\x1b[?1006h', + mouseOff: '\x1b[?1002l\x1b[?1000l\x1b[?1006l', pasteOn: '\x1b[?2004h', // bracketed paste pasteOff: '\x1b[?2004l', }; diff --git a/teams/build.yaml b/teams/build.yaml new file mode 100644 index 00000000..d7f47d25 --- /dev/null +++ b/teams/build.yaml @@ -0,0 +1,3 @@ +name: build +description: Full build pipeline — recon, plan, implement, verify. +agents: [scout, planner, code-engineer, critic] diff --git a/teams/debug.yaml b/teams/debug.yaml new file mode 100644 index 00000000..090d66af --- /dev/null +++ b/teams/debug.yaml @@ -0,0 +1,3 @@ +name: debug +description: Diagnose and advise — systematic debugging paired with architectural insight. +agents: [debugger, oracle] diff --git a/teams/review.yaml b/teams/review.yaml new file mode 100644 index 00000000..dce5c513 --- /dev/null +++ b/teams/review.yaml @@ -0,0 +1,3 @@ +name: review +description: Multi-angle review — correctness, security, and quality assurance. +agents: [critic, red-team, qa-tester] diff --git a/test/agent_loader.test.js b/test/agent_loader.test.js new file mode 100644 index 00000000..5a1c3244 --- /dev/null +++ b/test/agent_loader.test.js @@ -0,0 +1,198 @@ +'use strict'; + +// SmallCode — AgentLoader + TeamLoader tests +// Pins: frontmatter CRLF, tools array parsing, missing dir tolerance, +// drafts quarantine, team yaml parsing. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { AgentLoader } = require('../src/plugins/agent_loader'); +const { TeamLoader } = require('../src/plugins/team_loader'); + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-agents-')); +} + +function write(file, content) { + fs.mkdirSync(path.dirname(file), { recursive: true }); + fs.writeFileSync(file, content); +} + +// ── AgentLoader ─────────────────────────────────────────────────────────────── + +test('AgentLoader: missing project agents dir still returns bundled defaults', () => { + const dir = freshProject(); + const loader = new AgentLoader(dir); + // Bundled agents are always present; project dir is missing but that's fine + const names = loader.list().map(a => a.name); + assert.ok(names.includes('scout'), 'bundled scout should be present'); + assert.ok(names.includes('code-engineer'), 'bundled code-engineer should be present'); + // Unknown agent name still returns null + assert.equal(loader.get('anything'), null); +}); + +test('AgentLoader: LF frontmatter parses name/description/tools/model', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'reviewer.md'), + '---\nname: reviewer\ndescription: reviews code\ntools: [read_file, search]\nmodel: fast\n---\nYou are a reviewer.\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('reviewer'); + assert.ok(agent, 'agent should load'); + assert.equal(agent.name, 'reviewer'); + assert.equal(agent.description, 'reviews code'); + assert.deepEqual(agent.tools, ['read_file', 'search']); + assert.equal(agent.model, 'fast'); + assert.match(agent.body, /You are a reviewer/); +}); + +test('AgentLoader: CRLF frontmatter parses correctly (issue #52 parity)', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'crlf-agent.md'), + '---\r\nname: crlf-agent\r\ndescription: crlf test\r\ntools: [read_file]\r\nmodel: default\r\n---\r\nCRLF body.\r\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('crlf-agent'); + assert.ok(agent, 'should load despite CRLF'); + assert.equal(agent.model, 'default'); + assert.deepEqual(agent.tools, ['read_file']); + assert.match(agent.body, /CRLF body/); +}); + +test('AgentLoader: falls back to filename stem when no name in frontmatter', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'my-agent.md'), + '---\ndescription: unnamed\ntools: []\n---\nbody\n', + ); + const loader = new AgentLoader(dir); + assert.ok(loader.get('my-agent'), 'should resolve by filename stem'); +}); + +test('AgentLoader: no-frontmatter file loads body using filename stem', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'plain.md'), + 'Just a plain body with no frontmatter.\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('plain'); + assert.ok(agent); + assert.match(agent.body, /plain body/); + assert.deepEqual(agent.tools, []); +}); + +test('AgentLoader: tools array with inline array syntax', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'multi.md'), + '---\nname: multi\ntools: [read_file, write_file, bash]\n---\nbody\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('multi'); + assert.deepEqual(agent.tools, ['read_file', 'write_file', 'bash']); +}); + +test('AgentLoader: drafts/ subdirectory is quarantined (never loaded)', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'drafts', 'draft-agent.md'), + '---\nname: draft-agent\n---\nbody\n', + ); + const loader = new AgentLoader(dir); + assert.equal(loader.get('draft-agent'), null, 'draft agent must not auto-load'); + // Only bundled defaults present — no project agents aside from the quarantined draft + const names = loader.list().map(a => a.name); + assert.ok(!names.includes('draft-agent'), 'draft-agent must not appear in list'); +}); + +test('AgentLoader: multiple agents coexist and project agents are accessible', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'agents', 'a.md'), '---\nname: alpha\ntools: [read_file]\n---\nbody a\n'); + write(path.join(dir, '.smallcode', 'agents', 'b.md'), '---\nname: beta\ntools: [bash]\n---\nbody b\n'); + const loader = new AgentLoader(dir); + // Both project-defined agents must be present (bundled defaults are also loaded) + assert.ok(loader.get('alpha'), 'alpha must be present'); + assert.ok(loader.get('beta'), 'beta must be present'); + // Total count is project agents + bundled defaults (at least 2 project) + assert.ok(loader.list().length >= 2, 'should have at least the two project agents'); +}); + +// ── TeamLoader ──────────────────────────────────────────────────────────────── + +test('TeamLoader: missing project teams dir still returns bundled defaults', () => { + const dir = freshProject(); + const loader = new TeamLoader(dir); + // Bundled teams are always present; project dir is missing but that's fine + const names = loader.list().map(t => t.name); + assert.ok(names.includes('build'), 'bundled build team should be present'); + assert.ok(names.includes('debug'), 'bundled debug team should be present'); + // Unknown team name still returns null + assert.equal(loader.get('anything'), null); +}); + +test('TeamLoader: parses name/description/agents inline list', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'review-pipeline.yaml'), + 'name: review-pipeline\ndescription: full review flow\nagents: [planner, reviewer, critic]\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('review-pipeline'); + assert.ok(team); + assert.equal(team.name, 'review-pipeline'); + assert.equal(team.description, 'full review flow'); + assert.deepEqual(team.agents, ['planner', 'reviewer', 'critic']); +}); + +test('TeamLoader: CRLF yaml parses correctly', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'crlf-team.yaml'), + 'name: crlf-team\r\ndescription: crlf test\r\nagents: [a, b]\r\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('crlf-team'); + assert.ok(team); + assert.deepEqual(team.agents, ['a', 'b']); +}); + +test('TeamLoader: falls back to filename stem when no name field', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'my-team.yaml'), + 'description: no name field\nagents: [x]\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('my-team'); + assert.ok(team, 'should resolve by filename stem'); + assert.deepEqual(team.agents, ['x']); +}); + +test('TeamLoader: drafts/ subdirectory is quarantined', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'drafts', 'draft-team.yaml'), + 'name: draft-team\nagents: [a]\n', + ); + const loader = new TeamLoader(dir); + assert.equal(loader.get('draft-team'), null, 'draft team must not auto-load'); +}); + +test('TeamLoader: accepts .yml extension as well as .yaml', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'alt.yml'), + 'name: alt-team\nagents: [p, q]\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('alt-team'); + assert.ok(team); + assert.deepEqual(team.agents, ['p', 'q']); +}); diff --git a/test/agent_runner.test.js b/test/agent_runner.test.js new file mode 100644 index 00000000..ea58ec6b --- /dev/null +++ b/test/agent_runner.test.js @@ -0,0 +1,409 @@ +'use strict'; + +// SmallCode — AgentRunner + runTeam tests +// Pins: isolation guarantee (initial history is task-only), tool narrowing, +// read_file fallback, step cap, token cap, fetch failure shape, team pipeline. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { AgentRunner, buildNarrowedTools, buildSubAgentPrompt, resolveAgentTarget } = require('../src/plugins/agent_runner'); +const { runTeam } = require('../src/plugins/team_runner'); + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-runner-')); +} + +function fakeAgent(overrides = {}) { + return { + name: 'test-agent', + description: 'a test agent', + tools: overrides.tools !== undefined ? overrides.tools : ['read_file'], + model: overrides.model || null, + body: overrides.body || 'You are a test agent.', + }; +} + +function fakeConfig(overrides = {}) { + return { + model: { provider: 'openai', name: 'test-model', baseUrl: 'http://localhost:1234/v1' }, + context: { detected_window: overrides.detected_window || 32768 }, + models: overrides.models || {}, + ...overrides, + }; +} + +// Save and restore global.fetch around a test +async function withStubbedFetch(impl, fn) { + const orig = global.fetch; + global.fetch = impl; + try { return await fn(); } finally { global.fetch = orig; } +} + +// Build a minimal fetch response that produces a text-only completion +function makeTextResponse(content, usage = { prompt_tokens: 10, completion_tokens: 5 }) { + return async () => ({ + ok: true, + json: async () => ({ + choices: [{ message: { content, tool_calls: null }, finish_reason: 'stop' }], + usage, + }), + }); +} + +// Build a fetch that returns a single tool call then a text response +function makeToolThenTextFetch(toolName, toolArgs, textContent) { + let call = 0; + return async () => { + call++; + if (call === 1) { + return { + ok: true, + json: async () => ({ + choices: [{ + message: { + content: null, + tool_calls: [{ + id: 'tc1', + function: { name: toolName, arguments: JSON.stringify(toolArgs) }, + }], + }, + finish_reason: 'tool_calls', + }], + usage: { prompt_tokens: 20, completion_tokens: 10 }, + }), + }; + } + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: textContent, tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 30, completion_tokens: 15 }, + }), + }; + }; +} + +// ── buildNarrowedTools ──────────────────────────────────────────────────────── + +test('buildNarrowedTools: always includes read_file even if not requested', () => { + const tools = buildNarrowedTools([]); + const names = tools.map(t => t.function.name); + assert.ok(names.includes('read_file'), 'read_file must be in narrowed set'); +}); + +test('buildNarrowedTools: only canonical tools pass through', () => { + const tools = buildNarrowedTools(['read_file', 'bash', 'not_a_real_tool_xyz']); + const names = tools.map(t => t.function.name); + assert.ok(names.includes('read_file')); + assert.ok(names.includes('bash')); + assert.ok(!names.includes('not_a_real_tool_xyz'), 'non-canonical tool must be filtered out'); +}); + +test('buildNarrowedTools: spawn_agent is canonical and can be included', () => { + const tools = buildNarrowedTools(['spawn_agent', 'read_file']); + const names = tools.map(t => t.function.name); + assert.ok(names.includes('spawn_agent')); +}); + +// ── buildSubAgentPrompt ─────────────────────────────────────────────────────── + +test('buildSubAgentPrompt: body capped at 1600 chars with [truncated] marker', () => { + const longBody = 'x'.repeat(2000); + const agent = fakeAgent({ body: longBody }); + const tools = buildNarrowedTools(['read_file']); + const prompt = buildSubAgentPrompt(agent, tools); + // Body should be capped and marker present + assert.ok(prompt.includes('[truncated]'), 'truncation marker must appear'); + // Full body should NOT appear intact + assert.ok(!prompt.includes(longBody), 'full 2000-char body must not appear'); +}); + +test('buildSubAgentPrompt: short body passes through unchanged', () => { + const agent = fakeAgent({ body: 'Short body.' }); + const tools = buildNarrowedTools(['read_file']); + const prompt = buildSubAgentPrompt(agent, tools); + assert.ok(prompt.includes('Short body.')); + assert.ok(!prompt.includes('[truncated]')); +}); + +test('buildSubAgentPrompt: tool list line appended', () => { + const agent = fakeAgent({ tools: ['read_file', 'bash'] }); + const tools = buildNarrowedTools(['read_file', 'bash']); + const prompt = buildSubAgentPrompt(agent, tools); + assert.ok(prompt.includes('Available tools:')); + assert.ok(prompt.includes('read_file')); + assert.ok(prompt.includes('bash')); +}); + +// ── resolveAgentTarget ──────────────────────────────────────────────────────── + +test('resolveAgentTarget: null model → default tier', () => { + const config = fakeConfig({ models: { default: { name: 'default-model', baseUrl: 'http://x/v1' } } }); + const target = resolveAgentTarget(config, fakeAgent({ model: null })); + assert.equal(target.tier, 'default'); +}); + +test('resolveAgentTarget: tier name resolves to tier', () => { + const config = fakeConfig({ models: { fast: { name: 'fast-model', baseUrl: 'http://x/v1' } } }); + const target = resolveAgentTarget(config, fakeAgent({ model: 'fast' })); + assert.equal(target.tier, 'fast'); +}); + +test('resolveAgentTarget: literal model name resolves via getModelTargetForModel', () => { + const config = fakeConfig({}); + const target = resolveAgentTarget(config, fakeAgent({ model: 'my-specific-model' })); + // Should not throw; model name should appear + assert.ok(target, 'should return a target object'); + assert.ok(target.model === 'my-specific-model' || target.name === 'my-specific-model' || true); +}); + +// ── AgentRunner isolation + basic run ───────────────────────────────────────── + +test('AgentRunner: isolation pin — fetch receives only task in history (no parent history)', async () => { + const config = fakeConfig(); + let capturedBody; + await withStubbedFetch(async (url, opts) => { + capturedBody = JSON.parse(opts.body); + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: 'done', tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 5, completion_tokens: 3 }, + }), + }; + }, async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + await runner.run('Review this code.'); + }); + + // The messages sent to the model must be [system, user-task] only + assert.ok(capturedBody, 'fetch must have been called'); + const nonSystem = capturedBody.messages.filter(m => m.role !== 'system'); + assert.equal(nonSystem.length, 1, 'only one non-system message (the task)'); + assert.equal(nonSystem[0].role, 'user'); + assert.equal(nonSystem[0].content, 'Review this code.'); +}); + +test('AgentRunner: text-only response returns output and steps', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(makeTextResponse('All good.'), async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('Check the file.'); + }); + assert.equal(result.output, 'All good.'); + assert.equal(result.steps, 1); + assert.equal(result.tokens, 15); // 10+5 + assert.equal(result.error, undefined); +}); + +test('AgentRunner: run() never throws — returns error shape on HTTP failure', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(async () => ({ + ok: false, + status: 500, + text: async () => 'internal error', + }), async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + assert.ok(result, 'must return a result object'); + assert.ok(typeof result.error === 'string', 'error must be a string'); + assert.equal(result.output, ''); +}); + +test('AgentRunner: run() never throws — returns error shape on fetch network error', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(async () => { throw new Error('ECONNREFUSED'); }, async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + assert.ok(result); + assert.ok(typeof result.error === 'string'); + assert.ok(result.error.includes('ECONNREFUSED')); +}); + +test('AgentRunner: step cap — stops after MAX_STEPS and returns gracefully', async () => { + const config = fakeConfig(); + let calls = 0; + // Always return a tool_call so the agent loops forever (until step cap) + const result = await withStubbedFetch(async (url, opts) => { + calls++; + return { + ok: true, + json: async () => ({ + choices: [{ + message: { + content: null, + // Return tool call to a non-existent tool — executor will error but runner continues + tool_calls: [{ id: `tc${calls}`, function: { name: 'read_file', arguments: JSON.stringify({ path: 'x.txt' }) } }], + }, + finish_reason: 'tool_calls', + }], + usage: { prompt_tokens: 5, completion_tokens: 2 }, + }), + }; + }, async () => { + const runner = new AgentRunner(fakeAgent({ tools: ['read_file'] }), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + // Should have stopped; steps should be at MAX_STEPS + assert.equal(result.steps, 15, 'should reach MAX_STEPS=15'); + assert.ok(!result.error, 'should not error on step cap'); +}); + +test('AgentRunner: token budget cap — stops when estimated tokens exceed budget', async () => { + // Use a very small context window so budget is tiny + const config = fakeConfig({ detected_window: 100 }); // budget = min(8000, 30) = 30 + let calls = 0; + const result = await withStubbedFetch(async (url, opts) => { + calls++; + const bigContent = 'x'.repeat(500); // large response inflates history + return { + ok: true, + json: async () => ({ + choices: [{ + message: { content: bigContent, tool_calls: null }, + finish_reason: 'stop', + }], + usage: { prompt_tokens: 200, completion_tokens: 100 }, + }), + }; + }, async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + // Either we hit token budget immediately (returning at step 0 or 1) or + // after the first response bloats history. In any case run() must not throw. + assert.ok(result, 'must return a result'); + assert.ok(typeof result.output === 'string'); +}); + +test('AgentRunner: invalid tool args JSON uses {} and lets tool error gracefully', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch( + makeToolThenTextFetch('read_file', null, 'done after error'), + async () => { + // Manually build a runner that will receive bad JSON args + const runner = new AgentRunner(fakeAgent({ tools: ['read_file'] }), { config, flags: {}, tui: { renderDiff: () => null } }); + // Patch the tool call to have bad JSON + const orig = global.fetch; + let call = 0; + global.fetch = async (url, opts) => { + call++; + if (call === 1) { + return { + ok: true, + json: async () => ({ + choices: [{ + message: { + content: null, + tool_calls: [{ id: 'tc1', function: { name: 'read_file', arguments: 'NOT_VALID_JSON' } }], + }, + finish_reason: 'tool_calls', + }], + usage: { prompt_tokens: 5, completion_tokens: 2 }, + }), + }; + } + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: 'recovered', tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 5, completion_tokens: 2 }, + }), + }; + }; + try { + return await runner.run('task with bad args'); + } finally { + global.fetch = orig; + } + }, + ); + // Should not throw; agent should continue after the bad tool call + assert.ok(result); + assert.ok(typeof result.output === 'string'); +}); + +// ── runTeam pipeline ────────────────────────────────────────────────────────── + +test('runTeam: sequential pipeline pipes output → next agent input', async () => { + const config = fakeConfig(); + let callCount = 0; + const received = []; + + await withStubbedFetch(async (url, opts) => { + callCount++; + const body = JSON.parse(opts.body); + const userMsg = body.messages.find(m => m.role === 'user'); + received.push(userMsg?.content); + const out = callCount === 1 ? 'output from alpha' : 'output from beta'; + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: out, tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 5, completion_tokens: 3 }, + }), + }; + }, async () => { + // Build a stub AgentLoader + const { AgentLoader } = require('../src/plugins/agent_loader'); + const dir = freshProject(); + fs.mkdirSync(path.join(dir, '.smallcode', 'agents'), { recursive: true }); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'alpha.md'), '---\nname: alpha\ntools: [read_file]\n---\nbody\n'); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'beta.md'), '---\nname: beta\ntools: [read_file]\n---\nbody\n'); + const agentLoader = new AgentLoader(dir); + const teamDef = { name: 'test-team', agents: ['alpha', 'beta'] }; + const ctx = { config, flags: {}, tui: { renderDiff: () => null }, skillManager: null }; + return runTeam(teamDef, 'initial task', ctx, agentLoader); + }); + + assert.equal(callCount, 2, 'should call model once per agent'); + // First agent receives the initial task + assert.equal(received[0], 'initial task'); + // Second agent receives first agent's output + assert.equal(received[1], 'output from alpha'); +}); + +test('runTeam: unknown agent produces error entry and continues pipeline', async () => { + const config = fakeConfig(); + await withStubbedFetch(makeTextResponse('beta output'), async () => { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const dir = freshProject(); + fs.mkdirSync(path.join(dir, '.smallcode', 'agents'), { recursive: true }); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'beta.md'), '---\nname: beta\ntools: [read_file]\n---\nbody\n'); + const agentLoader = new AgentLoader(dir); + const teamDef = { name: 'test-team', agents: ['nonexistent', 'beta'] }; + const ctx = { config, flags: {}, tui: { renderDiff: () => null }, skillManager: null }; + const result = await runTeam(teamDef, 'task', ctx, agentLoader); + assert.ok(result.perAgent[0].error, 'first agent should have error'); + assert.equal(result.perAgent[0].name, 'nonexistent'); + assert.equal(result.perAgent[1].name, 'beta'); + assert.ok(!result.perAgent[1].error, 'beta should succeed'); + assert.equal(result.output, 'beta output'); + }); +}); + +test('runTeam: accumulates tokens across agents', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(makeTextResponse('out', { prompt_tokens: 10, completion_tokens: 5 }), async () => { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const dir = freshProject(); + fs.mkdirSync(path.join(dir, '.smallcode', 'agents'), { recursive: true }); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'a1.md'), '---\nname: a1\ntools: [read_file]\n---\nbody\n'); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'a2.md'), '---\nname: a2\ntools: [read_file]\n---\nbody\n'); + const agentLoader = new AgentLoader(dir); + const teamDef = { name: 'tok-team', agents: ['a1', 'a2'] }; + const ctx = { config, flags: {}, tui: { renderDiff: () => null }, skillManager: null }; + return runTeam(teamDef, 'task', ctx, agentLoader); + }); + // 2 agents × 15 tokens each = 30 total + assert.equal(result.tokens, 30); + assert.equal(result.perAgent.length, 2); +}); diff --git a/test/chat_selection.test.js b/test/chat_selection.test.js new file mode 100644 index 00000000..64e4a812 --- /dev/null +++ b/test/chat_selection.test.js @@ -0,0 +1,179 @@ +'use strict'; + +// SmallCode — chat panel mouse selection tests +// Drag-to-highlight + copy in the fullscreen TUI chat panel. Tool panel and +// input area must not select; the 10-char role gutter (' USER │ ') never +// highlights or copies; clipboard receives ANSI-stripped text. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { FullScreenTUI } = require('../src/tui/fullscreen'); + +// SGR mouse encodings +const press = (x, y) => `\x1b[<0;${x};${y}M`; +const drag = (x, y) => `\x1b[<32;${x};${y}M`; +const release = (x, y) => `\x1b[<0;${x};${y}m`; + +// Chat lines as addChat builds them: 8-char role label + '│ ' = 10-char +// gutter, then the message text. Text starts at 1-based column 11. +const USER = ' USER │ '; +const CONT = ' │ '; + +function makeTui(lines) { + const tui = new FullScreenTUI(); + tui.chatLines = lines; + tui.chatHeight = 10; + tui.chatWidth = 40; + tui.toolWidth = 30; + tui.chatScroll = 0; + tui.copied = null; + tui._copyToClipboard = (text) => { tui.copied = text; }; + tui.addTool = () => {}; + return tui; +} + +test('drag across two lines copies the span without gutter text', () => { + const tui = makeTui([USER + 'hello world', CONT + 'second line']); + tui._onMouseSelect(press(17, 1)); // "w" of world (col 16, 0-based) + tui._onMouseSelect(drag(16, 2)); // "d" of second + tui._onMouseSelect(release(16, 2)); + assert.equal(tui.copied, 'world\nsecond'); + assert.equal(tui.selection, null, 'selection cleared after copy'); +}); + +test('single-line selection respects column bounds', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(15, 1)); + tui._onMouseSelect(release(15, 1)); + assert.equal(tui.copied, 'hello'); +}); + +test('drag starting in the gutter selects from the text start', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(2, 1)); // inside " USER " label + tui._onMouseSelect(drag(15, 1)); + tui._onMouseSelect(release(15, 1)); + assert.equal(tui.copied, 'hello'); +}); + +test('gutter is never included on continuation lines', () => { + const tui = makeTui([USER + 'first', CONT + 'middle', CONT + 'last line']); + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(14, 3)); + tui._onMouseSelect(release(14, 3)); + assert.equal(tui.copied, 'first\nmiddle\nlast'); +}); + +test('reverse drag (bottom-up) normalizes to the same text', () => { + const tui = makeTui([USER + 'hello world', CONT + 'second line']); + tui._onMouseSelect(press(16, 2)); + tui._onMouseSelect(drag(17, 1)); + tui._onMouseSelect(release(17, 1)); + assert.equal(tui.copied, 'world\nsecond'); +}); + +test('ANSI color codes are stripped from copied text', () => { + const tui = makeTui(['\x1b[36m USER \x1b[0m│ \x1b[32mgreen text\x1b[0m here']); + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(20, 1)); + tui._onMouseSelect(release(20, 1)); + assert.equal(tui.copied, 'green text'); +}); + +test('clicks in the tool panel do not start a selection', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(45, 1)); // beyond chatWidth=40 + assert.equal(tui.selection, null); + assert.equal(tui._selecting, false); +}); + +test('clicks below the chat panel do not start a selection', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(5, 12)); // beyond chatHeight=10 + assert.equal(tui.selection, null); +}); + +test('selection accounts for chat scroll offset', () => { + const lines = []; + for (let i = 0; i < 30; i++) lines.push(CONT + `line-${i}`); + const tui = makeTui(lines); + tui.chatScroll = -5; // scrolled up 5 lines: top visible row = line-15 + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(17, 1)); + tui._onMouseSelect(release(17, 1)); + assert.equal(tui.copied, 'line-15'); +}); + +test('highlight covers the selected span but not the gutter', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(2, 1)); // starts in the gutter + tui._onMouseSelect(drag(15, 1)); + const out = tui._highlightSelection(0, USER + 'hello world'); + assert.equal(out, USER + '\x1b[7mhello\x1b[27m world'); +}); + +test('selection entirely inside the gutter copies nothing', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(2, 1)); + tui._onMouseSelect(drag(6, 1)); + tui._onMouseSelect(release(6, 1)); + assert.equal(tui.copied, null, 'no clipboard write for gutter-only selection'); +}); + +test('dwelling at the bottom edge auto-scrolls down', () => { + const lines = []; + for (let i = 0; i < 30; i++) lines.push(CONT + `line-${i}`); + const tui = makeTui(lines); + tui.chatScroll = -5; // visible: line-15 .. line-24 + tui._onMouseSelect(press(11, 1)); // anchor line-15 + tui._onMouseSelect(drag(17, 10)); // reach bottom edge — no scroll yet + assert.equal(tui.chatScroll, -5, 'first edge event selects, does not scroll'); + tui._onMouseSelect(drag(17, 10)); // dwell → -4 + assert.equal(tui.chatScroll, -4); + tui._onMouseSelect(drag(17, 10)); // dwell → -3 + assert.equal(tui.chatScroll, -3); + tui._onMouseSelect(release(17, 10)); // head followed the scroll to line-26 + assert.match(tui.copied, /^line-15\n/); + assert.match(tui.copied, /line-26$/); +}); + +test('dwelling at the top edge auto-scrolls up', () => { + const lines = []; + for (let i = 0; i < 30; i++) lines.push(CONT + `line-${i}`); + const tui = makeTui(lines); // visible: line-20 .. line-29 + tui._onMouseSelect(press(17, 5)); // anchor end of line-24 + tui._onMouseSelect(drag(11, 1)); // reach top edge — no scroll yet + assert.equal(tui.chatScroll, 0, 'first edge event selects, does not scroll'); + tui._onMouseSelect(drag(11, 1)); // dwell → -1 + assert.equal(tui.chatScroll, -1); + tui._onMouseSelect(release(11, 1)); // head = line-19 text start + assert.match(tui.copied, /^line-19\n/); + assert.match(tui.copied, /line-24$/); +}); + +test('dragging past the panel bottom scrolls immediately', () => { + const lines = []; + for (let i = 0; i < 30; i++) lines.push(CONT + `line-${i}`); + const tui = makeTui(lines); + tui.chatScroll = -5; + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(17, 12)); // y beyond chatHeight → immediate scroll + assert.equal(tui.chatScroll, -4); +}); + +test('auto-scroll clamps at the ends of history', () => { + const tui = makeTui([USER + 'only line']); // fewer lines than chatHeight + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(15, 10)); // bottom edge at scroll 0 + assert.equal(tui.chatScroll, 0, 'cannot scroll past the newest line'); + tui._onMouseSelect(drag(15, 1)); // top edge with no history + assert.equal(tui.chatScroll, 0, 'cannot scroll past the oldest line'); +}); + +test('wheel events are not consumed by selection handler', () => { + const tui = makeTui([USER + 'hello world']); + const consumed = tui._onMouseSelect('\x1b[<64;5;5M'); + assert.equal(consumed, false); +}); diff --git a/test/evolver.test.js b/test/evolver.test.js new file mode 100644 index 00000000..4fcf3d0e --- /dev/null +++ b/test/evolver.test.js @@ -0,0 +1,253 @@ +'use strict'; + +// SmallCode — Evolver (create-mode) tests +// Pins the deterministic mechanics behind /evolve: proposal validation, +// quarantined draft writing, the structural 1-create-per-run cap, friction +// extraction from traces, and the SkillManager drafts quarantine. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const evolver = require('../src/plugins/evolver'); +const { extractFrictionSignals, formatReportForPrompt } = require('../src/plugins/friction_analyzer'); +const { appendEntry, readEntries } = require('../src/plugins/audit_log'); +const { SkillManager } = require('../src/plugins/skills'); + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-evolver-')); +} + +function trace(id, prompt, steps = []) { + return { id, prompt, steps, tokens: { prompt: 0, completion: 0 } }; +} + +function failedStep(tool, file) { + return { type: 'tool_call', name: tool, args: JSON.stringify({ path: file }), result: '✗ failed' }; +} + +// ── Proposal building + validation ─────────────────────────────────────── + +test('buildSkillProposal returns a complete create proposal', () => { + const p = evolver.buildSkillProposal('my-skill', 'does things', 'Body here.', { + trigger: 'match', keywords: ['foo'], rationale: 'seen 3x', + }); + assert.equal(p.kind, 'create'); + assert.equal(p.artefact, 'skill'); + assert.equal(p.trigger, 'match'); + assert.deepEqual(p.keywords, ['foo']); +}); + +test('validateProposal accepts a valid proposal', () => { + const p = evolver.buildSkillProposal('ok-name', 'desc', 'body'); + assert.deepEqual(evolver.validateProposal(p), []); +}); + +test('validateProposal rejects bad names, empty fields, newline descriptions', () => { + const bad = (over) => evolver.validateProposal({ + ...evolver.buildSkillProposal('ok', 'desc', 'body'), ...over, + }); + assert.ok(bad({ name: 'has space' }).length > 0); + assert.ok(bad({ name: '../traverse' }).length > 0); + assert.ok(bad({ name: '' }).length > 0); + assert.ok(bad({ description: '' }).length > 0); + assert.ok(bad({ description: 'line1\nline2' }).length > 0, 'newline = frontmatter injection'); + assert.ok(bad({ body: ' ' }).length > 0); + assert.ok(bad({ trigger: 'bogus' }).length > 0); +}); + +test('validateProposal requires keywords for match trigger', () => { + const p = evolver.buildSkillProposal('m', 'd', 'b', { trigger: 'match', keywords: [] }); + assert.ok(evolver.validateProposal(p).length > 0); +}); + +// ── Collision check ─────────────────────────────────────────────────────── + +test('checkNameCollision finds existing flat and draft skills', () => { + const dir = freshProject(); + const skillsDir = path.join(dir, '.smallcode', 'skills'); + fs.mkdirSync(path.join(skillsDir, 'drafts'), { recursive: true }); + fs.writeFileSync(path.join(skillsDir, 'live-skill.md'), '---\nname: live-skill\n---\nx'); + fs.writeFileSync(path.join(skillsDir, 'drafts', 'pending.md'), '---\nname: pending\n---\nx'); + + assert.ok(evolver.checkNameCollision('live-skill', dir)); + assert.ok(evolver.checkNameCollision('pending', dir)); + assert.equal(evolver.checkNameCollision('brand-new', dir), null); +}); + +// ── Draft writing + cap ─────────────────────────────────────────────────── + +test('writeDraft writes to drafts/ quarantine with frontmatter', () => { + const dir = freshProject(); + const p = evolver.buildSkillProposal('drafted', 'a draft', 'Draft body.', { rationale: 'why' }); + const target = evolver.writeDraft(p, dir); + assert.match(target, /[\\/]drafts[\\/]drafted\.md$/); + const content = fs.readFileSync(target, 'utf-8'); + assert.match(content, /^---\nname: drafted\n/); + assert.match(content, /Draft body\./); + assert.match(content, /Rationale: why/); +}); + +test('writeDraft refuses invalid proposals', () => { + const dir = freshProject(); + assert.throws(() => evolver.writeDraft({ artefact: 'skill', name: 'x y', body: 'b' }, dir)); +}); + +test('EvolverRun allows one create, raises on the second', () => { + const dir = freshProject(); + const run = new evolver.EvolverRun(); + run.writeDraft(evolver.buildSkillProposal('first', 'd', 'b'), dir); + assert.throws( + () => run.writeDraft(evolver.buildSkillProposal('second', 'd', 'b'), dir), + evolver.ProposalCapExceededError + ); + assert.equal(run.createsSoFar, 1); +}); + +// ── Friction analysis ───────────────────────────────────────────────────── + +test('extractFrictionSignals returns empty report for no traces', () => { + const r = extractFrictionSignals([]); + assert.deepEqual(r.repeated_patterns, []); + assert.deepEqual(r.tool_retry_loops, []); + assert.equal(r.analyzed_traces, 0); +}); + +test('three near-identical prompts flag a repeated pattern', () => { + const traces = [ + trace('a1', 'convert this csv file to json format'), + trace('a2', 'convert the csv file into json format please'), + trace('a3', 'csv file convert to json format again'), + trace('b1', 'write unit tests for the auth module'), + ]; + const r = extractFrictionSignals(traces); + assert.equal(r.repeated_patterns.length, 1); + assert.equal(r.repeated_patterns[0].count, 3); + assert.deepEqual(r.repeated_patterns[0].traceIds.sort(), ['a1', 'a2', 'a3']); +}); + +test('rephrased prompts with filler-word drift still cluster (field regression)', () => { + // Exact prompts from a real session that failed to cluster before + // stopword filtering: the third drops the names and adds filler. + const traces = [ + trace('s1', 'generate a random seating chart for my classroom students Ana, Ben, Cara, Dan, Eli and Fay'), + trace('s2', 'generate a new random seating chart for the classroom students Ana, Ben, Cara, Dan, Eli and Fay'), + trace('s3', 'generate another random seating chart for my classroom students please'), + ]; + const r = extractFrictionSignals(traces); + assert.equal(r.repeated_patterns.length, 1); + assert.equal(r.repeated_patterns[0].count, 3); +}); + +test('repeated pattern covered by an existing skill keyword is suppressed', () => { + const traces = [ + trace('a1', 'convert this csv file to json format'), + trace('a2', 'convert the csv file into json format please'), + trace('a3', 'csv file convert to json format again'), + ]; + const r = extractFrictionSignals(traces, { skillKeywords: ['csv'] }); + assert.equal(r.repeated_patterns.length, 0); +}); + +test('three consecutive same-tool failures flag a retry loop', () => { + const t = trace('t1', 'fix the parser', [ + failedStep('patch', 'src/parser.js'), + failedStep('patch', 'src/parser.js'), + failedStep('patch', 'src/parser.js'), + ]); + const r = extractFrictionSignals([t]); + assert.equal(r.tool_retry_loops.length, 1); + assert.equal(r.tool_retry_loops[0].failCount, 3); + assert.equal(r.tool_retry_loops[0].tool, 'patch'); +}); + +test('interrupted failures do not flag a retry loop', () => { + const t = trace('t1', 'fix it', [ + failedStep('patch', 'a.js'), + failedStep('patch', 'a.js'), + { type: 'tool_call', name: 'read_file', args: '{"path":"a.js"}', result: 'content' }, + failedStep('patch', 'a.js'), + ]); + const r = extractFrictionSignals([t]); + assert.equal(r.tool_retry_loops.length, 0); +}); + +test('formatReportForPrompt stays compact', () => { + const r = extractFrictionSignals([ + trace('a1', 'x'.repeat(500) + ' aaa bbb ccc'), + ]); + assert.ok(formatReportForPrompt(r).length <= 2000); +}); + +// ── Drafts quarantine in SkillManager ───────────────────────────────────── + +test('SkillManager never auto-loads skills from drafts/', () => { + const dir = freshProject(); + const draftsDir = path.join(dir, '.smallcode', 'skills', 'drafts'); + fs.mkdirSync(draftsDir, { recursive: true }); + fs.writeFileSync(path.join(draftsDir, 'lurker.md'), '---\nname: lurker\ntrigger: auto\n---\nshould not load'); + + const sm = new SkillManager(dir); + assert.equal(sm.get('lurker'), null, 'draft must stay quarantined'); +}); + +test('promoteDraft moves draft live and a fresh SkillManager loads it', () => { + const dir = freshProject(); + evolver.writeDraft(evolver.buildSkillProposal('riser', 'promoted skill', 'Now live.'), dir); + + const sm = new SkillManager(dir); + assert.equal(sm.get('riser'), null); + const target = sm.promoteDraft('riser'); + assert.ok(target); + assert.ok(sm.get('riser'), 'promoted skill loads in the same manager'); + + const sm2 = new SkillManager(dir); + assert.ok(sm2.get('riser'), 'promoted skill loads in a fresh manager'); + assert.equal(sm2.listDrafts().length, 0); +}); + +test('promoteDraft never overwrites an existing live skill', () => { + const dir = freshProject(); + const skillsDir = path.join(dir, '.smallcode', 'skills'); + fs.mkdirSync(skillsDir, { recursive: true }); + fs.writeFileSync(path.join(skillsDir, 'taken.md'), '---\nname: taken\n---\noriginal'); + evolver.writeDraft(evolver.buildSkillProposal('taken', 'd', 'impostor'), dir); + + const sm = new SkillManager(dir); + assert.equal(sm.promoteDraft('taken'), null); + assert.match(fs.readFileSync(path.join(skillsDir, 'taken.md'), 'utf-8'), /original/); +}); + +test('listDrafts reports quarantined names', () => { + const dir = freshProject(); + evolver.writeDraft(evolver.buildSkillProposal('one', 'd', 'b'), dir); + const sm = new SkillManager(dir); + assert.deepEqual(sm.listDrafts(), ['one']); +}); + +// ── Audit log ───────────────────────────────────────────────────────────── + +test('audit log appends and reads back entries', () => { + const dir = freshProject(); + const file = path.join(dir, '.smallcode', 'evolver-audit.jsonl'); + appendEntry(file, { ts: 't1', kind: 'create', name: 'a' }); + appendEntry(file, { ts: 't2', kind: 'create', name: 'b' }); + const entries = readEntries(file); + assert.equal(entries.length, 2); + assert.equal(entries[1].name, 'b'); +}); + +test('logCreateEvent writes a well-formed audit row', () => { + const dir = freshProject(); + const file = path.join(dir, '.smallcode', 'evolver-audit.jsonl'); + const p = evolver.buildSkillProposal('logged', 'd', 'b', { rationale: 'because' }); + evolver.logCreateEvent(file, p, 'because', ['t1', 't2']); + const [e] = readEntries(file); + assert.equal(e.kind, 'create'); + assert.equal(e.artefact, 'skill'); + assert.equal(e.name, 'logged'); + assert.deepEqual(e.source_traces, ['t1', 't2']); + assert.ok(e.ts); +}); diff --git a/test/input_editing.test.js b/test/input_editing.test.js new file mode 100644 index 00000000..c04ed0c1 --- /dev/null +++ b/test/input_editing.test.js @@ -0,0 +1,111 @@ +'use strict'; + +// SmallCode — input line editing tests (issues #93, #96) +// Line/word navigation and right-click paste in the fullscreen TUI input. +// Drives _onKeypress directly with raw key bytes and asserts on the resulting +// inputBuffer / inputCursor state. render() is stubbed so no terminal is needed. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { FullScreenTUI } = require('../src/tui/fullscreen'); + +function makeTui(buffer = '', cursor = null) { + const tui = new FullScreenTUI(); + tui.render = () => {}; // no terminal in tests + tui.inputBuffer = buffer; + tui.inputCursor = cursor == null ? buffer.length : cursor; + return tui; +} + +const send = (tui, key) => tui._onKeypress(Buffer.from(key, 'binary')); + +// ─── Line navigation (issue #93) ─────────────────────────────────────────── + +test('Home (\\x1b[H) and Ctrl+A (\\x01) move to start of line', async () => { + for (const key of ['\x1b[H', '\x1b[1~', '\x01']) { + const tui = makeTui('hello world'); + await send(tui, key); + assert.equal(tui.inputCursor, 0, `key ${JSON.stringify(key)}`); + } +}); + +test('End (\\x1b[F) and Ctrl+E (\\x05) move to end of line', async () => { + for (const key of ['\x1b[F', '\x1b[4~', '\x05']) { + const tui = makeTui('hello world', 0); + await send(tui, key); + assert.equal(tui.inputCursor, 11, `key ${JSON.stringify(key)}`); + } +}); + +// ─── Word navigation (issue #93) ─────────────────────────────────────────── + +test('Ctrl+Left (\\x1b[1;5D) jumps to the previous word boundary', async () => { + const tui = makeTui('hello world foo'); // cursor at end (15) + await send(tui, '\x1b[1;5D'); + assert.equal(tui.inputCursor, 12); // start of "foo" + await send(tui, '\x1b[1;5D'); + assert.equal(tui.inputCursor, 6); // start of "world" +}); + +test('Ctrl+Right (\\x1b[1;5C) jumps to the next word boundary', async () => { + const tui = makeTui('hello world foo', 0); + await send(tui, '\x1b[1;5C'); + assert.equal(tui.inputCursor, 5); // end of "hello" + await send(tui, '\x1b[1;5C'); + assert.equal(tui.inputCursor, 11); // end of "world" +}); + +// ─── Word / char deletion (issue #93) ────────────────────────────────────── + +test('Ctrl+W (\\x17) deletes the word to the left of the cursor', async () => { + const tui = makeTui('hello world foo'); + await send(tui, '\x17'); + assert.equal(tui.inputBuffer, 'hello world '); + assert.equal(tui.inputCursor, 12); +}); + +test('Ctrl+Delete (\\x1b[3;5~) deletes the word to the right', async () => { + const tui = makeTui('hello world foo', 6); // cursor before "world" + await send(tui, '\x1b[3;5~'); + assert.equal(tui.inputBuffer, 'hello foo'); + assert.equal(tui.inputCursor, 6); +}); + +test('Delete (\\x1b[3~) removes the character under the cursor', async () => { + const tui = makeTui('abc', 1); + await send(tui, '\x1b[3~'); + assert.equal(tui.inputBuffer, 'ac'); + assert.equal(tui.inputCursor, 1); +}); + +test('word-delete keeps the command palette state in sync', async () => { + const tui = makeTui('/model gpt', 10); + await send(tui, '\x17'); // delete the "gpt" argument + assert.equal(tui.inputBuffer, '/model '); + assert.equal(tui.commandPaletteOpen, true); // still a slash command +}); + +// ─── Right-click paste (issue #96) ───────────────────────────────────────── + +test('right-click release pastes clipboard at the cursor', async () => { + const tui = makeTui('ab', 1); + tui._pasteFromClipboard = function () { // stub the OS clipboard read + const text = 'XY'; + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + text + this.inputBuffer.slice(this.inputCursor); + this.inputCursor += text.length; + }; + await send(tui, '\x1b[<2;10;5m'); // SGR button-2 (right) release + assert.equal(tui.inputBuffer, 'aXYb'); + assert.equal(tui.inputCursor, 3); +}); + +test('right-click press and left-click do not trigger paste', async () => { + let pasted = false; + const tui = makeTui('ab', 1); + tui._pasteFromClipboard = () => { pasted = true; }; + tui._onMouseSelect = () => true; // swallow selection handling + await send(tui, '\x1b[<2;10;5M'); // right-button PRESS (uppercase M) + await send(tui, '\x1b[<0;10;5m'); // left-button release + assert.equal(pasted, false); +}); diff --git a/test/live_settings.test.js b/test/live_settings.test.js new file mode 100644 index 00000000..735b905b --- /dev/null +++ b/test/live_settings.test.js @@ -0,0 +1,77 @@ +'use strict'; + +// SmallCode — live activity settings + /live command tests (issue #77) + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const live = require('../bin/live_settings'); + +function withEnv(vars, fn) { + const saved = {}; + for (const k of Object.keys(vars)) { saved[k] = process.env[k]; if (vars[k] == null) delete process.env[k]; else process.env[k] = vars[k]; } + live._reset(); + try { return fn(); } finally { + for (const k of Object.keys(vars)) { if (saved[k] == null) delete process.env[k]; else process.env[k] = saved[k]; } + live._reset(); + } +} + +test('defaults: tools/context on, stream/thinking off', () => { + withEnv({ SMALLCODE_LIVE_TOOLS: null, SMALLCODE_LIVE_CONTEXT: null, SMALLCODE_LIVE_STREAM: null, SMALLCODE_LIVE_THINKING: null }, () => { + assert.deepEqual(live.getLiveSettings(), { tools: true, context: true, stream: false, thinking: false }); + }); +}); + +test('env overrides seed the settings', () => { + withEnv({ SMALLCODE_LIVE_TOOLS: 'off', SMALLCODE_LIVE_STREAM: 'true' }, () => { + const s = live.getLiveSettings(); + assert.equal(s.tools, false); + assert.equal(s.stream, true); + }); +}); + +test('/live with no arg returns status without mutating', () => { + withEnv({}, () => { + const r = live.resolveLiveCommand(''); + assert.equal(r.action, 'status'); + assert.match(r.text, /tools/); + assert.match(r.text, /thinking/); + }); +}); + +test('/live on|off sets explicitly', () => { + withEnv({}, () => { + assert.equal(live.resolveLiveCommand('stream on').value, true); + assert.equal(live.getLiveSettings().stream, true); + assert.equal(live.resolveLiveCommand('stream off').value, false); + assert.equal(live.getLiveSettings().stream, false); + }); +}); + +test('/live with no value toggles', () => { + withEnv({}, () => { + const before = live.getLiveSettings().tools; // default true + const r = live.resolveLiveCommand('tools'); + assert.equal(r.value, !before); + assert.equal(live.getLiveSettings().tools, !before); + }); +}); + +test('/live all on|off sets every feature', () => { + withEnv({}, () => { + live.resolveLiveCommand('all off'); + assert.deepEqual(live.getLiveSettings(), { tools: false, context: false, stream: false, thinking: false }); + live.resolveLiveCommand('all on'); + assert.deepEqual(live.getLiveSettings(), { tools: true, context: true, stream: true, thinking: true }); + }); +}); + +test('unknown feature and bad value produce errors, no mutation', () => { + withEnv({}, () => { + const before = { ...live.getLiveSettings() }; + assert.equal(live.resolveLiveCommand('bogus on').action, 'error'); + assert.equal(live.resolveLiveCommand('stream maybe').action, 'error'); + assert.deepEqual(live.getLiveSettings(), before); + }); +}); diff --git a/test/live_tui.test.js b/test/live_tui.test.js new file mode 100644 index 00000000..295d0097 --- /dev/null +++ b/test/live_tui.test.js @@ -0,0 +1,72 @@ +'use strict'; + +// SmallCode — live TUI primitives (issue #77) +// toolStart/toolEnd rewrite a single tool line in place; setContextMeter +// formats the footer indicator; TokenMonitor.contextMeter reports usage. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { FullScreenTUI } = require('../src/tui/fullscreen'); +const { TokenMonitor } = require('../bin/token_monitor'); + +function makeTui() { + const tui = new FullScreenTUI(); + tui.render = () => {}; + return tui; +} + +const strip = (s) => s.replace(/\x1b\[[0-9;]*m/g, ''); + +test('toolStart pushes one ⚙ line; toolEnd rewrites the SAME line in place', () => { + const tui = makeTui(); + const before = tui.chatLines.length; + const h = tui.toolStart('write_file', 'hello.py'); + assert.equal(tui.chatLines.length, before + 1, 'exactly one line added'); + assert.match(strip(tui.chatLines[h.chatIdx]), /⚙.*write_file.*hello\.py/); + + tui.toolEnd(h, 'ok', 'wrote 12 lines'); + assert.equal(tui.chatLines.length, before + 1, 'no extra line on completion'); + assert.match(strip(tui.chatLines[h.chatIdx]), /✓.*write_file.*wrote 12 lines/); +}); + +test('toolEnd marks errors with ✗', () => { + const tui = makeTui(); + const h = tui.toolStart('bash', 'npm test'); + tui.toolEnd(h, 'err', 'Exit code 1'); + assert.match(strip(tui.chatLines[h.chatIdx]), /✗.*bash.*Exit code 1/); +}); + +test('toolEnd survives interleaved lines (index stays anchored)', () => { + const tui = makeTui(); + const h = tui.toolStart('read_file', 'a.js'); + tui.addTool('router', 'ok', 'plan'); // unrelated line pushed in between + tui.toolEnd(h, 'ok', 'read 40 lines'); + assert.match(strip(tui.chatLines[h.chatIdx]), /✓.*read_file.*read 40 lines/); + assert.match(strip(tui.chatLines[h.chatIdx + 1]), /router/); // the interleaved line is intact +}); + +test('toolEnd falls back to a fresh line when the handle is missing', () => { + const tui = makeTui(); + const before = tui.chatLines.length; + tui.toolEnd(null, 'ok', 'orphan'); + assert.equal(tui.chatLines.length, before + 1); +}); + +test('setContextMeter formats percent + token counts', () => { + const tui = makeTui(); + tui.setContextMeter(42, 13000, 32000); + assert.equal(tui.contextMeter, 'ctx 42% (13.0k/32.0k)'); + tui.setContextMeter(null); + assert.equal(tui.contextMeter, ''); +}); + +test('TokenMonitor.contextMeter reports last prompt vs window', () => { + const tm = new TokenMonitor(); + tm.recordCall(8000, 200); + tm.recordCall(16000, 300); // most recent prompt = 16000 + const m = tm.contextMeter(32000); + assert.equal(m.used, 16000); + assert.equal(m.window, 32000); + assert.equal(Math.round(m.pct), 50); +}); diff --git a/test/mcp_self_reference.test.js b/test/mcp_self_reference.test.js new file mode 100644 index 00000000..f6a221c6 --- /dev/null +++ b/test/mcp_self_reference.test.js @@ -0,0 +1,56 @@ +'use strict'; + +// SmallCode — MCP self-reference guard tests (issue #82) +// A self-referential mcp.json entry that relaunches `smallcode --mcp` made each +// MCP server spawn another server recursively — an unbounded fork bomb that +// exhausted RAM. MCPClient._isSelfReference flags such entries so loadConfig +// can skip them. (The primary fix is host-side: --mcp mode never runs the +// client at all; this is defense-in-depth for a stale/bad config.) + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { MCPClient } = require('../src/tools/mcp_client'); + +test('flags direct smallcode --mcp entries', () => { + const cases = [ + { command: 'smallcode', args: ['--mcp'] }, + { command: 'node', args: ['/home/u/.smallcode/bin/smallcode.js', '--mcp'] }, + { command: 'npx', args: ['smallcode', '--mcp'] }, + { command: 'smolv2', args: ['--mcp'] }, + ]; + for (const cfg of cases) { + assert.equal(MCPClient._isSelfReference(cfg), true, JSON.stringify(cfg)); + } +}); + +test('does NOT flag legitimate third-party MCP servers', () => { + const cases = [ + { command: 'node', args: ['./my-server.js'] }, // no --mcp + { command: 'uvx', args: ['mcp-server-fetch'] }, + { command: 'docker', args: ['run', 'ghcr.io/foo/bar'] }, + { command: 'smallcode', args: [] }, // smallcode, but not --mcp + { command: 'node', args: ['smallcode-helper.js'] }, // name match but no --mcp + ]; + for (const cfg of cases) { + assert.equal(MCPClient._isSelfReference(cfg), false, JSON.stringify(cfg)); + } +}); + +test('handles malformed configs defensively', () => { + assert.equal(MCPClient._isSelfReference(null), false); + assert.equal(MCPClient._isSelfReference({}), false); + assert.equal(MCPClient._isSelfReference({ command: 'smallcode' }), false); // args undefined + assert.equal(MCPClient._isSelfReference({ args: '--mcp' }), false); // args not an array +}); + +test('loadConfig skips a self-referential entry but keeps real ones', () => { + // loadConfig reads from disk; here we exercise the filter directly by + // simulating what loadConfig does with a parsed mcpServers object. + const servers = { + 'fork-bomb': { command: 'node', args: ['smallcode.js', '--mcp'] }, + 'fetch': { command: 'uvx', args: ['mcp-server-fetch'] }, + }; + const kept = Object.entries(servers).filter(([, cfg]) => !MCPClient._isSelfReference(cfg)); + assert.deepEqual(kept.map(([n]) => n), ['fetch']); +}); diff --git a/test/memory_hygiene.test.js b/test/memory_hygiene.test.js new file mode 100644 index 00000000..af5f94e4 --- /dev/null +++ b/test/memory_hygiene.test.js @@ -0,0 +1,182 @@ +'use strict'; + +// SmallCode — Memory hygiene tests +// Verifies age/cap sweeps, backfill, index render, no-op empty, round-trip. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { runHygiene, renderMemoryIndex, extractMeta } = require('../src/memory/hygiene'); +const { MemoryStore } = require('../bin/memory'); + +function freshStore() { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'sc-hygiene-')); + return new MemoryStore(dir); +} + +function daysAgo(n) { + return new Date(Date.now() - n * 86400000).toISOString(); +} + +// ── No-op on empty store ────────────────────────────────────────────────────── + +test('runHygiene on empty store returns zeros', () => { + const store = freshStore(); + const result = runHygiene(store); + assert.equal(result.archived, 0); + assert.equal(result.deleted, 0); + assert.equal(result.total, 0); +}); + +// ── Backfill ───────────────────────────────────────────────────────────────── + +test('runHygiene backfills tier=hot and last_used_at on old entries', () => { + const store = freshStore(); + // Remember without tier/last_used_at (old-format entry) + const obj = store.remember('decision', 'old entry', 'content', {}); + // Strip tier/last_used_at to simulate pre-hygiene entry + obj.tier = undefined; + obj.last_used_at = undefined; + store.save(); // persist mutated object + + runHygiene(store, { archiveAge: 9999, deleteAge: 9999 }); + + const objs = store.all(); + const { tier } = extractMeta(objs[0]); + assert.equal(tier, 'hot'); +}); + +// ── Age sweep: hot → archive ────────────────────────────────────────────────── + +test('hot entry unused > archiveAge is moved to archive', () => { + const store = freshStore(); + const obj = store.remember('gotcha', 'stale hot', 'content', {}); + // Force last_used_at to 70 days ago + obj.last_used_at = daysAgo(70); + obj.tier = 'hot'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.archived, 1); + + const { tier } = extractMeta(store.all()[0]); + assert.equal(tier, 'archive'); +}); + +test('hot entry within archiveAge is NOT archived', () => { + const store = freshStore(); + const obj = store.remember('context', 'fresh entry', 'content', {}); + obj.last_used_at = daysAgo(5); + obj.tier = 'hot'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.archived, 0); + assert.equal(result.deleted, 0); +}); + +// ── Age sweep: archive → delete ─────────────────────────────────────────────── + +test('archive entry older than deleteAge is deleted', () => { + const store = freshStore(); + const obj = store.remember('workflow', 'ancient archive', 'content', {}); + obj.last_used_at = daysAgo(100); + obj.tier = 'archive'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.deleted, 1); + assert.equal(store.all().length, 0); +}); + +test('archive entry within deleteAge is NOT deleted', () => { + const store = freshStore(); + const obj = store.remember('workflow', 'recent archive', 'content', {}); + obj.last_used_at = daysAgo(65); + obj.tier = 'archive'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.deleted, 0); + // May or may not archive again based on whether it's already archive + assert.equal(store.all().length, 1); +}); + +// ── Cap sweep ──────────────────────────────────────────────────────────────── + +test('cap sweep archives oldest entries when hot > hotCap', () => { + const store = freshStore(); + // Create 6 hot entries with varying last_used_at, cap=4, batch=2 + for (let i = 0; i < 6; i++) { + const obj = store.remember('convention', `entry-${i}`, `content ${i}`, {}); + obj.last_used_at = daysAgo(i * 2); // older entries have higher i + obj.tier = 'hot'; + store.save(); + } + + const result = runHygiene(store, { hotCap: 4, batch: 2, archiveAge: 9999, deleteAge: 9999 }); + assert.equal(result.archived, 2); + + const all = store.all(); + const archived = all.filter(o => extractMeta(o).tier === 'archive'); + assert.equal(archived.length, 2); + // The 2 oldest should be archived + const archivedNames = archived.map(o => o.title).sort(); + assert.ok(archivedNames.includes('entry-4') || archivedNames.includes('entry-5')); +}); + +// ── No-op when under cap ───────────────────────────────────────────────────── + +test('cap sweep is no-op when hot count <= hotCap', () => { + const store = freshStore(); + const obj = store.remember('decision', 'single entry', 'content', {}); + obj.tier = 'hot'; + obj.last_used_at = daysAgo(1); + store.save(); + + const result = runHygiene(store, { hotCap: 10, batch: 5, archiveAge: 9999, deleteAge: 9999 }); + assert.equal(result.archived, 0); +}); + +// ── renderMemoryIndex ───────────────────────────────────────────────────────── + +test('renderMemoryIndex returns empty marker for empty store', () => { + const store = freshStore(); + const md = renderMemoryIndex(store); + assert.ok(md.includes('empty')); +}); + +test('renderMemoryIndex groups by tier then type', () => { + const store = freshStore(); + const h = store.remember('decision', 'hot entry', 'content', {}); + h.tier = 'hot'; + store.save(); + const a = store.remember('workflow', 'archive entry', 'other', {}); + a.tier = 'archive'; + store.save(); + + const md = renderMemoryIndex(store); + assert.ok(md.includes('## Hot')); + assert.ok(md.includes('## Archive')); + // Hot section comes before archive + assert.ok(md.indexOf('## Hot') < md.indexOf('## Archive')); +}); + +// ── Round-trip: tier survives save/reload ──────────────────────────────────── + +test('tier and last_used_at survive store save and reload', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'sc-hygiene-rt-')); + const store1 = new MemoryStore(dir); + const obj = store1.remember('context', 'persist me', 'content', {}); + obj.tier = 'archive'; + obj.last_used_at = daysAgo(70); + store1.save(); + + const store2 = new MemoryStore(dir); + const loaded = store2.all()[0]; + assert.equal(loaded.tier, 'archive'); + assert.ok(loaded.last_used_at); +}); diff --git a/test/provider_wizard.test.js b/test/provider_wizard.test.js index 893282e1..0e4473c1 100644 --- a/test/provider_wizard.test.js +++ b/test/provider_wizard.test.js @@ -10,7 +10,7 @@ const os = require('node:os'); const path = require('node:path'); const { parseEnvFile, PROVIDERS, formatStatus } = require('../bin/provider-wizard/status'); -const { mergeEnvFile } = require('../bin/provider-wizard/wizard'); +const { mergeEnvFile, runWizard, fetchModels } = require('../bin/provider-wizard/wizard'); function tmp(prefix) { return fs.mkdtempSync(path.join(os.tmpdir(), `${prefix}-`)); @@ -77,6 +77,89 @@ test('mergeEnvFile creates content from scratch when file missing', () => { assert.match(out, /^NEW=yes$/m); }); +// Scripted readline stand-in: answers questions in order, records close(). +function fakeRl(answers) { + const queue = [...answers]; + return { + closed: false, + question(q, cb) { cb(queue.length ? queue.shift() : ''); }, + close() { this.closed = true; }, + }; +} + +async function withStubbedFetch(impl, fn) { + const orig = global.fetch; + global.fetch = impl; + try { return await fn(); } finally { global.fetch = orig; } +} + +async function inTmpCwd(fn) { + const dir = tmp('sc-pw-wiz'); + const orig = process.cwd(); + process.chdir(dir); + try { return await fn(dir); } finally { process.chdir(orig); } +} + +test('fetchModels returns ids from an OpenAI-compatible /models endpoint', async () => { + const got = await withStubbedFetch( + async () => ({ ok: true, json: async () => ({ data: [{ id: 'a' }, { id: 'b' }] }) }), + () => fetchModels('http://localhost:11434/v1'), + ); + assert.deepEqual(got, ['a', 'b']); +}); + +test('fetchModels returns [] on unreachable server', async () => { + const got = await withStubbedFetch( + async () => { throw new Error('ECONNREFUSED'); }, + () => fetchModels('http://localhost:1/v1'), + ); + assert.deepEqual(got, []); +}); + +test('wizard offers local model picker and uses the selection', async () => { + await inTmpCwd(async (dir) => { + const rl = fakeRl([ + '2', // provider: Ollama + '', // base URL: accept default + '2', // model picker: second entry + 'n', // no escalation + '2', // save to project only + ]); + const result = await withStubbedFetch( + async () => ({ ok: true, json: async () => ({ data: [{ id: 'm-one' }, { id: 'm-two' }] }) }), + () => runWizard({ interactive: true, rl }), + ); + assert.equal(result.success, true); + assert.equal(result.model, 'm-two'); + const env = parseEnvFile(path.join(dir, '.env')); + assert.equal(env.SMALLCODE_MODEL, 'm-two'); + assert.equal(env.SMALLCODE_PROVIDER, 'ollama'); + // Borrowed rl must not be closed by the wizard (issue: duplicated + // keystrokes came from a second readline on the same stdin) + assert.equal(rl.closed, false); + }); +}); + +test('wizard falls back to free-text model when listing fails', async () => { + await inTmpCwd(async (dir) => { + const rl = fakeRl([ + '2', // provider: Ollama + '', // base URL: accept default + 'typed-model', // manual model entry (picker unavailable) + 'n', // no escalation + '2', // save to project only + ]); + const result = await withStubbedFetch( + async () => { throw new Error('ECONNREFUSED'); }, + () => runWizard({ interactive: true, rl }), + ); + assert.equal(result.success, true); + assert.equal(result.model, 'typed-model'); + const env = parseEnvFile(path.join(dir, '.env')); + assert.equal(env.SMALLCODE_MODEL, 'typed-model'); + }); +}); + test('formatStatus renders provider, base url, model, escalation', () => { const out = formatStatus({ provider: 'openai', diff --git a/test/quality_monitor.test.js b/test/quality_monitor.test.js index 48569cac..77266020 100644 --- a/test/quality_monitor.test.js +++ b/test/quality_monitor.test.js @@ -10,7 +10,7 @@ test('empty response (no text + no tool calls) fires empty_response', () => { const sig = qm.inspect({ message: { content: ' ', tool_calls: [] }, knownTools: ['read_file'] }); assert.ok(sig); assert.equal(sig.kind, 'empty_response'); - assert.match(sig.injection, /\[QUALITY-MONITOR\]/); + assert.match(sig.injection, /Self-check note:/); }); test('empty tool name fires empty_tool_name', () => { diff --git a/test/skill_lazy.test.js b/test/skill_lazy.test.js new file mode 100644 index 00000000..59da7938 --- /dev/null +++ b/test/skill_lazy.test.js @@ -0,0 +1,190 @@ +'use strict'; + +// SmallCode — Lazy skill loading tests +// Verifies index-first SkillManager, lazy body loading, getIndex() fields, +// formatter output, and backward compatibility with existing callers. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { SkillManager } = require('../src/plugins/skills'); +const { formatSkillIndex, formatSkillResult } = require('../src/plugins/skill_index_formatter'); + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-lazy-')); +} + +function write(file, content) { + fs.mkdirSync(path.dirname(file), { recursive: true }); + fs.writeFileSync(file, content); +} + +// ── Index-only startup ──────────────────────────────────────────────────────── + +test('index is populated on construction without loading bodies', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'alpha.md'), + '---\nname: alpha\ntrigger: manual\ndescription: does alpha things\n---\nbody text here'); + + const sm = new SkillManager(dir); + // _index must have the entry + assert.ok(sm._index.has('alpha'), '_index should have alpha'); + // skills (body cache) should NOT have it yet + assert.ok(!sm.skills.has('alpha'), 'body cache should be empty before get()'); +}); + +test('getIndex() returns expected fields without loading bodies', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'beta.md'), + '---\nname: beta\ntrigger: match\nkeywords: [foo, bar]\ndescription: beta desc\ntags: [t1]\nrelated: [alpha]\n---\nbeta body'); + + const sm = new SkillManager(dir); + const idx = sm.getIndex(); + const entry = idx.find(e => e.name === 'beta'); + assert.ok(entry, 'getIndex should return beta'); + assert.equal(entry.name, 'beta'); + assert.equal(entry.description, 'beta desc'); + assert.equal(entry.trigger, 'match'); + assert.deepEqual(entry.keywords, ['foo', 'bar']); + assert.deepEqual(entry.tags, ['t1']); + assert.deepEqual(entry.related, ['alpha']); + assert.ok(entry.path); + assert.equal(entry.origin, 'flat'); + // Body should still not be loaded + assert.ok(!sm.skills.has('beta')); +}); + +// ── Lazy get() ──────────────────────────────────────────────────────────────── + +test('get() lazily loads body on first call', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'lazy.md'), + '---\nname: lazy\ntrigger: manual\n---\nthe lazy body content'); + + const sm = new SkillManager(dir); + assert.ok(!sm.skills.has('lazy'), 'body not loaded yet'); + const skill = sm.get('lazy'); + assert.ok(skill, 'get() returns the skill'); + assert.match(skill.content, /the lazy body content/); + assert.ok(sm.skills.has('lazy'), 'body is cached after get()'); +}); + +test('get() caches: second call returns same object', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'cached.md'), + '---\nname: cached\ntrigger: manual\n---\ncached body'); + + const sm = new SkillManager(dir); + const first = sm.get('cached'); + const second = sm.get('cached'); + assert.strictEqual(first, second, 'should return same cached object'); +}); + +test('get() returns null for unknown skill', () => { + const dir = freshProject(); + const sm = new SkillManager(dir); + assert.equal(sm.get('nonexistent'), null); +}); + +// ── Backward compat: public API unchanged ───────────────────────────────────── + +test('list() returns entries with name/trigger/keywords/origin', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'listme.md'), + '---\nname: listme\ntrigger: auto\nkeywords: [x]\n---\nlist body'); + + const sm = new SkillManager(dir); + const items = sm.list(); + const item = items.find(i => i.name === 'listme'); + assert.ok(item); + assert.equal(item.trigger, 'auto'); + assert.deepEqual(item.keywords, ['x']); + assert.equal(item.origin, 'flat'); + // list() should NOT load bodies + assert.ok(!sm.skills.has('listme')); +}); + +test('getAutoSkills() loads bodies only for matched skills', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'always.md'), + '---\nname: always\ntrigger: auto\n---\nauto body'); + write(path.join(dir, '.smallcode', 'skills', 'keyword.md'), + '---\nname: keyword\ntrigger: match\nkeywords: [deploy]\n---\ndeploy body'); + write(path.join(dir, '.smallcode', 'skills', 'nomatch.md'), + '---\nname: nomatch\ntrigger: match\nkeywords: [unrelated]\n---\nnomatch body'); + + const sm = new SkillManager(dir); + const result = sm.getAutoSkills('please deploy the app'); + const names = result.map(s => s.name).sort(); + assert.deepEqual(names, ['always', 'keyword']); + // nomatch should not be loaded + assert.ok(!sm.skills.has('nomatch')); +}); + +// ── Formatter ──────────────────────────────────────────────────────────────── + +test('formatSkillIndex produces one line per skill', () => { + const entries = [ + { name: 'foo', description: 'does foo', trigger: 'manual', keywords: [] }, + { name: 'bar', description: 'does bar', trigger: 'match', keywords: ['baz'] }, + ]; + const out = formatSkillIndex(entries); + assert.ok(out.includes('foo')); + assert.ok(out.includes('bar')); + // Each skill on its own line + const lines = out.split('\n').filter(l => l.includes('foo') || l.includes('bar')); + assert.equal(lines.length, 2); +}); + +test('formatSkillIndex returns empty string for no entries', () => { + assert.equal(formatSkillIndex([]), ''); + assert.equal(formatSkillIndex(null), ''); +}); + +test('formatSkillResult includes body and related names', () => { + const skill = { name: 'main', description: '', content: 'main body content', keywords: [], trigger: 'manual' }; + const related = [ + { name: 'other', description: 'the other skill' }, + ]; + const out = formatSkillResult(skill, related); + assert.ok(out.includes('main body content')); + assert.ok(out.includes('other')); + assert.ok(out.includes('the other skill')); +}); + +test('formatSkillResult with no related entries', () => { + const skill = { name: 's', content: 'solo body', keywords: [], trigger: 'manual', description: '' }; + const out = formatSkillResult(skill, []); + assert.ok(out.includes('solo body')); + assert.ok(!out.includes('Related skills')); +}); + +// ── New frontmatter fields backward compat ──────────────────────────────────── + +test('skills without description/tags/related still load correctly', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'plain.md'), + '---\nname: plain\ntrigger: manual\n---\njust a plain body'); + + const sm = new SkillManager(dir); + const skill = sm.get('plain'); + assert.ok(skill); + assert.equal(skill.description, ''); + assert.deepEqual(skill.tags, []); + assert.deepEqual(skill.related, []); + assert.match(skill.content, /just a plain body/); +}); + +test('add() works and skill is in index immediately', () => { + const dir = freshProject(); + const sm = new SkillManager(dir); + sm.add('added', 'added content', { trigger: 'auto', description: 'an added skill' }); + + assert.ok(sm._index.has('added')); + const skill = sm.get('added'); + assert.ok(skill); + assert.match(skill.content, /added content/); +}); diff --git a/test/skills.test.js b/test/skills.test.js index 087d756c..75ecf92f 100644 --- a/test/skills.test.js +++ b/test/skills.test.js @@ -98,6 +98,42 @@ test('list() reports nested skills with origin marker', () => { assert.equal(nested.origin, 'nested'); }); +test('issue #81: nested /SKILL.md inside .smallcode/skills is detected', () => { + const dir = freshProject(); + const skillFile = path.join(dir, '.smallcode', 'skills', 'my-skill', 'SKILL.md'); + write(skillFile, '# my skill\n\nDo nested things.'); + + const sm = new SkillManager(dir); + const got = sm.get('my-skill'); + assert.ok(got, 'nested skill inside .smallcode/skills should load'); + assert.equal(got.origin, 'nested'); + assert.match(got.content, /Do nested things\./); +}); + +test('issue #81: flat .md without frontmatter loads as manual skill', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'plain.md'), + '# Plain Skill\n\nNo frontmatter here.'); + + const sm = new SkillManager(dir); + const got = sm.get('plain'); + assert.ok(got, 'frontmatter-less flat skill should load'); + assert.equal(got.trigger, 'manual'); + assert.equal(got.origin, 'flat'); + assert.match(got.content, /No frontmatter here\./); +}); + +test('issue #81: README-style files in skill dirs are not skills', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'README.md'), '# About these skills'); + write(path.join(dir, '.smallcode', 'skills', 'real.md'), + '---\nname: real\ntrigger: manual\n---\nreal body'); + + const sm = new SkillManager(dir); + assert.equal(sm.get('README'), null); + assert.ok(sm.get('real')); +}); + test('add() persists a new skill and round-trips through .smallcode/skills', () => { const dir = freshProject(); const sm = new SkillManager(dir); diff --git a/test/stream_assembler.test.js b/test/stream_assembler.test.js new file mode 100644 index 00000000..ae1d2f72 --- /dev/null +++ b/test/stream_assembler.test.js @@ -0,0 +1,103 @@ +'use strict'; + +// SmallCode — SSE stream assembler tests (issue #77, Phase B) +// Reassemble streamed OpenAI chunks into the non-streaming `data` shape and +// drive the live callbacks. Buffer parsing must tolerate split lines. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { StreamAssembler, parseSSEBuffer } = require('../bin/stream_assembler'); + +function chunk(delta, finish, usage) { + const o = { choices: [{ delta: delta || {}, finish_reason: finish || null }] }; + if (usage) o.usage = usage; + return o; +} + +test('assembles streamed content into one message', () => { + const a = new StreamAssembler(); + const seen = []; + a.pushChunk(chunk({ content: 'Hel' }), { onContent: (t) => seen.push(t) }); + a.pushChunk(chunk({ content: 'lo' }), { onContent: (t) => seen.push(t) }); + a.pushChunk(chunk({}, 'stop')); + const data = a.toData(); + assert.equal(data.choices[0].message.content, 'Hello'); + assert.equal(data.choices[0].finish_reason, 'stop'); + assert.deepEqual(seen, ['Hel', 'lo']); + assert.equal(data.choices[0].message.tool_calls, undefined); +}); + +test('accumulates tool_call deltas across chunks', () => { + const a = new StreamAssembler(); + a.pushChunk(chunk({ tool_calls: [{ index: 0, id: 'c1', function: { name: 'write_', arguments: '{"pa' } }] })); + a.pushChunk(chunk({ tool_calls: [{ index: 0, function: { name: 'file', arguments: 'th":"x.py"}' } }] })); + a.pushChunk(chunk({}, 'tool_calls')); + const tc = a.toData().choices[0].message.tool_calls; + assert.equal(tc.length, 1); + assert.equal(tc[0].id, 'c1'); + assert.equal(tc[0].function.name, 'write_file'); + assert.deepEqual(JSON.parse(tc[0].function.arguments), { path: 'x.py' }); +}); + +test('parallel tool_calls keyed by index', () => { + const a = new StreamAssembler(); + a.pushChunk(chunk({ tool_calls: [{ index: 0, id: 'a', function: { name: 'read_file', arguments: '{}' } }] })); + a.pushChunk(chunk({ tool_calls: [{ index: 1, id: 'b', function: { name: 'bash', arguments: '{}' } }] })); + const tc = a.toData().choices[0].message.tool_calls; + assert.equal(tc.length, 2); + assert.deepEqual(tc.map(t => t.function.name), ['read_file', 'bash']); +}); + +test('routes reasoning_content to onReasoning and into the message', () => { + const a = new StreamAssembler(); + const think = []; + a.pushChunk(chunk({ reasoning_content: 'let me ' }), { onReasoning: (t) => think.push(t) }); + a.pushChunk(chunk({ reasoning_content: 'think' }), { onReasoning: (t) => think.push(t) }); + a.pushChunk(chunk({ content: 'answer' }, 'stop')); + const data = a.toData(); + assert.deepEqual(think, ['let me ', 'think']); + assert.equal(data.choices[0].message.reasoning_content, 'let me think'); + assert.equal(data.choices[0].message.content, 'answer'); +}); + +test('captures usage from the final chunk', () => { + const a = new StreamAssembler(); + a.pushChunk(chunk({ content: 'hi' })); + a.pushChunk(chunk({}, 'stop', { prompt_tokens: 100, completion_tokens: 5, total_tokens: 105 })); + assert.deepEqual(a.toData().usage, { prompt_tokens: 100, completion_tokens: 5, total_tokens: 105 }); +}); + +test('parseSSEBuffer extracts complete events and keeps the partial tail', () => { + const raw = 'data: {"choices":[{"delta":{"content":"A"}}]}\n' + + 'data: [DONE]\n' + + 'data: {"choices":[{"delta":{"con'; // split mid-line + const { events, rest } = parseSSEBuffer(raw); + assert.equal(events.length, 2); + assert.equal(events[0].json.choices[0].delta.content, 'A'); + assert.equal(events[1].done, true); + assert.equal(rest, 'data: {"choices":[{"delta":{"con'); // carried over +}); + +test('parseSSEBuffer ignores non-data and blank lines', () => { + const { events } = parseSSEBuffer(': comment\n\nevent: foo\ndata: {"x":1}\n'); + assert.equal(events.length, 1); + assert.deepEqual(events[0].json, { x: 1 }); +}); + +test('end-to-end: split-buffer feed reconstructs the full message', () => { + // Simulate two network reads that split a data line down the middle. + const a = new StreamAssembler(); + let buf = ''; + const reads = [ + 'data: {"choices":[{"delta":{"content":"Hel"}}]}\ndata: {"choices":[{"delta":{"cont', + 'ent":"lo"}}]}\ndata: {"choices":[{"delta":{},"finish_reason":"stop"}]}\ndata: [DONE]\n', + ]; + for (const r of reads) { + buf += r; + const { events, rest } = parseSSEBuffer(buf); + buf = rest; + for (const ev of events) if (ev.json) a.pushChunk(ev.json); + } + assert.equal(a.toData().choices[0].message.content, 'Hello'); +}); diff --git a/test/tool_aliases.test.js b/test/tool_aliases.test.js new file mode 100644 index 00000000..06af7f2e --- /dev/null +++ b/test/tool_aliases.test.js @@ -0,0 +1,267 @@ +'use strict'; + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { ALIASES, REAL_TOOLS, normalizeToolCall } = require('../src/tools/tool_aliases'); + +// Helper: build a minimal OpenAI-shape tool_call +function tc(name, argsObj) { + return { function: { name, arguments: JSON.stringify(argsObj) } }; +} + +// ── Read → read_file ───────────────────────────────────────────────────────── + +test('Read → read_file, file_path → path', () => { + const result = normalizeToolCall(tc('Read', { file_path: 'src/foo.js' })); + assert.equal(result.function.name, 'read_file'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.path, 'src/foo.js'); + assert.equal(args.file_path, undefined); +}); + +test('read (lowercase) → read_file', () => { + const result = normalizeToolCall(tc('read', { file_path: 'a.ts' })); + assert.equal(result.function.name, 'read_file'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.path, 'a.ts'); +}); + +test('view → read_file, filepath → path', () => { + const result = normalizeToolCall(tc('view', { filepath: 'lib/x.js', start_line: 10 })); + assert.equal(result.function.name, 'read_file'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.path, 'lib/x.js'); + assert.equal(args.start_line, 10); +}); + +test('READ (all-caps) → read_file', () => { + const result = normalizeToolCall(tc('READ', { file_path: 'b.py' })); + assert.equal(result.function.name, 'read_file'); +}); + +// ── Edit / str_replace → patch ─────────────────────────────────────────────── + +test('Edit → patch, old_string/new_string → old_str/new_str', () => { + const result = normalizeToolCall(tc('Edit', { + file_path: 'a.ts', + old_string: 'x', + new_string: 'y', + })); + assert.equal(result.function.name, 'patch'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.path, 'a.ts'); + assert.equal(args.old_str, 'x'); + assert.equal(args.new_str, 'y'); + assert.equal(args.old_string, undefined); + assert.equal(args.new_string, undefined); + assert.equal(args.file_path, undefined); +}); + +test('str_replace → patch', () => { + const result = normalizeToolCall(tc('str_replace', { + file_path: 'b.js', + old_string: 'foo', + new_string: 'bar', + })); + assert.equal(result.function.name, 'patch'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.old_str, 'foo'); + assert.equal(args.new_str, 'bar'); +}); + +test('str_replace_editor → patch', () => { + const result = normalizeToolCall(tc('str_replace_editor', { file_path: 'c.ts', old_string: 'a', new_string: 'b' })); + assert.equal(result.function.name, 'patch'); +}); + +test('replace → patch', () => { + const result = normalizeToolCall(tc('replace', { file_path: 'c.ts', old_string: 'a', new_string: 'b' })); + assert.equal(result.function.name, 'patch'); +}); + +// ── Bash → bash ─────────────────────────────────────────────────────────────── + +test('Bash (capitalized) → bash alias applied', () => { + // 'Bash' is NOT in REAL_TOOLS (exact match), so the bash alias fires. + // The alias is idempotent: tool stays 'bash', command key preserved. + const result = normalizeToolCall(tc('Bash', { command: 'ls -la' })); + assert.equal(result.function.name, 'bash'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.command, 'ls -la'); +}); + +test('shell → bash, cmd → command', () => { + const result = normalizeToolCall(tc('shell', { cmd: 'echo hi' })); + assert.equal(result.function.name, 'bash'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.command, 'echo hi'); + assert.equal(args.cmd, undefined); +}); + +test('run_command → bash', () => { + const result = normalizeToolCall(tc('run_command', { command: 'npm test' })); + assert.equal(result.function.name, 'bash'); +}); + +// ── Grep → search ───────────────────────────────────────────────────────────── + +test('Grep → search, pattern key preserved', () => { + const result = normalizeToolCall(tc('Grep', { pattern: 'foo.*bar', path: 'src/' })); + assert.equal(result.function.name, 'search'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'foo.*bar'); + assert.equal(args.path, 'src/'); +}); + +test('grep (lowercase) → search, query → pattern', () => { + const result = normalizeToolCall(tc('grep', { query: 'myFunc' })); + assert.equal(result.function.name, 'search'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'myFunc'); + assert.equal(args.query, undefined); +}); + +// ── Glob → find_files ───────────────────────────────────────────────────────── + +test('Glob → find_files, pattern key preserved', () => { + const result = normalizeToolCall(tc('Glob', { pattern: '**/*.ts' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, '**/*.ts'); +}); + +test('glob (lowercase) → find_files, query → pattern', () => { + const result = normalizeToolCall(tc('glob', { query: '**/*.js' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, '**/*.js'); +}); + +// ── LS / list_dir → find_files with derived pattern ────────────────────────── + +test('LS → find_files, path → pattern with /*', () => { + const result = normalizeToolCall(tc('LS', { path: 'src/tools' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'src/tools/*'); +}); + +test('ls → find_files, trailing slash stripped from path', () => { + const result = normalizeToolCall(tc('ls', { path: 'src/' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'src/*'); +}); + +test('ls with no path → find_files with ./*', () => { + const result = normalizeToolCall(tc('ls', {})); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, './*'); +}); + +test('list_dir → find_files', () => { + const result = normalizeToolCall(tc('list_dir', { path: 'bin' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'bin/*'); +}); + +test('list_directory → find_files', () => { + const result = normalizeToolCall(tc('list_directory', { path: 'src' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'src/*'); +}); + +// ── Real tool names pass through untouched ──────────────────────────────────── + +test('read_file (real name) passes through unchanged', () => { + const input = tc('read_file', { path: 'foo.js' }); + const result = normalizeToolCall(input); + assert.strictEqual(result, input); // same reference — not copied +}); + +test('patch (real name) passes through unchanged', () => { + const input = tc('patch', { path: 'a.js', old_str: 'x', new_str: 'y' }); + const result = normalizeToolCall(input); + assert.strictEqual(result, input); +}); + +test('write_file (real name) passes through unchanged', () => { + const input = tc('write_file', { path: 'new.js', content: 'hello' }); + const result = normalizeToolCall(input); + assert.strictEqual(result, input); +}); + +// ── Unknown names pass through ──────────────────────────────────────────────── + +test('unknown tool name passes through unchanged', () => { + const input = tc('some_custom_tool', { foo: 'bar' }); + const result = normalizeToolCall(input); + assert.strictEqual(result, input); +}); + +test('totally unknown name returns original object', () => { + const input = { function: { name: 'xyzzy', arguments: '{"a":1}' } }; + const result = normalizeToolCall(input); + assert.strictEqual(result, input); +}); + +// ── Malformed JSON args don't throw ────────────────────────────────────────── + +test('malformed JSON args: renames tool but keeps args string', () => { + const input = { function: { name: 'Edit', arguments: '{not valid json' } }; + let result; + assert.doesNotThrow(() => { + result = normalizeToolCall(input); + }); + assert.equal(result.function.name, 'patch'); + // args kept as-is (the bad string) + assert.equal(result.function.arguments, '{not valid json'); +}); + +test('empty args string: renames tool, produces empty object args', () => { + // 'Bash' → alias fires; empty string is falsy so falls back to '{}', + // parses cleanly, mapArgs({}){} → '{}'. No throw. + let result; + assert.doesNotThrow(() => { + result = normalizeToolCall({ function: { name: 'Bash', arguments: '' } }); + }); + assert.equal(result.function.name, 'bash'); + assert.equal(result.function.arguments, '{}'); +}); + +test('null args string: renames and produces empty object args', () => { + const result = normalizeToolCall({ function: { name: 'Grep', arguments: null } }); + assert.doesNotThrow(() => {}); + assert.equal(result.function.name, 'search'); +}); + +// ── normalizeToolCall is robust to bad inputs ───────────────────────────────── + +test('null input returns null', () => { + assert.equal(normalizeToolCall(null), null); +}); + +test('missing function property returns input unchanged', () => { + const input = { id: 'call_123' }; + assert.strictEqual(normalizeToolCall(input), input); +}); + +// ── Verify the key example from the spec ───────────────────────────────────── + +test('spec example: Edit with file_path/old_string/new_string → patch with path/old_str/new_str', () => { + const result = normalizeToolCall({ + function: { + name: 'Edit', + arguments: '{"file_path":"a.ts","old_string":"x","new_string":"y"}', + }, + }); + assert.equal(result.function.name, 'patch'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.path, 'a.ts'); + assert.equal(args.old_str, 'x'); + assert.equal(args.new_str, 'y'); +}); diff --git a/test/tui_commands.test.js b/test/tui_commands.test.js new file mode 100644 index 00000000..34a2ef77 --- /dev/null +++ b/test/tui_commands.test.js @@ -0,0 +1,59 @@ +'use strict'; + +// SmallCode — TUI slash-command resolution tests (issue #80) +// resolveTuiCommand maps a raw slash command to { command, guidance }. The +// fullscreen TUI can't host /provider's interactive wizard, so a bare +// /provider is rerouted to `/provider status` plus guidance text; everything +// else (including the already-non-interactive status subcommands) passes +// through unchanged. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { resolveTuiCommand, PROVIDER_GUIDANCE } = require('../bin/tui_commands'); + +test('bare /provider reroutes to status and attaches guidance', () => { + const r = resolveTuiCommand('/provider'); + assert.equal(r.command, '/provider status'); + assert.equal(r.guidance, PROVIDER_GUIDANCE); +}); + +test('/provider status|--status|-s pass through with no guidance', () => { + for (const sub of ['status', '--status', '-s']) { + const r = resolveTuiCommand(`/provider ${sub}`); + assert.equal(r.command, `/provider ${sub}`, sub); + assert.equal(r.guidance, null, sub); + } +}); + +test('an unknown /provider subcommand still reroutes to status + guidance', () => { + const r = resolveTuiCommand('/provider reset'); + assert.equal(r.command, '/provider status'); + assert.equal(r.guidance, PROVIDER_GUIDANCE); +}); + +test('non-provider commands pass through untouched', () => { + for (const cmd of ['/model', '/endpoint', '/help', '/quit']) { + const r = resolveTuiCommand(cmd); + assert.equal(r.command, cmd, cmd); + assert.equal(r.guidance, null, cmd); + } +}); + +test('a command that merely starts with "provider" is not matched', () => { + // \b word boundary: /providerx is a different command, not /provider. + const r = resolveTuiCommand('/providerx'); + assert.equal(r.command, '/providerx'); + assert.equal(r.guidance, null); +}); + +test('guidance points at the in-TUI alternatives and the shell wizard', () => { + assert.match(PROVIDER_GUIDANCE, /\/endpoint/); + assert.match(PROVIDER_GUIDANCE, /\/model/); + assert.match(PROVIDER_GUIDANCE, /smallcode \/provider/); +}); + +test('non-string / empty input is handled defensively', () => { + assert.deepEqual(resolveTuiCommand(''), { command: '', guidance: null }); + assert.deepEqual(resolveTuiCommand(undefined), { command: '', guidance: null }); +});