From 5a4717e62f1efc57b0d49e709d3255a607b9eab8 Mon Sep 17 00:00:00 2001 From: DanTheMan181 <283874042+Doorman11991@users.noreply.github.com> Date: Fri, 29 May 2026 10:58:10 -0700 Subject: [PATCH 01/27] fix(#62): consolidate mid-conversation system messages for strict templates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Qwen3/Qwen3.5 chat templates under llama.cpp --jinja raise "System message must be at the beginning." and llama.cpp 400s when a system-role message appears at any index but 0 — but only when tools are present (that's when it compiles the template to build a tool-call grammar). SmallCode injects system content mid-conversation (clarifier, plan request, planner injection, path-validation warnings, skill activation, compaction), so the messages array routinely had system entries past index 0. New src/session/message_normalizer.js#consolidateSystemMessages() collapses all system-role messages into a single leading one (order preserved, identical blocks de-duplicated) and keeps only non-system turns after it. Applied in both request builders (bin/smallcode.js and bin/model_client.js chatCompletion) right before the body is sent. Verified E2E against a Qwen3 model: every tool-bearing request now carries exactly one system message at index 0. +9 tests; full suite 157 passing. --- CHANGELOG.md | 22 +++++++ bin/model_client.js | 6 +- bin/smallcode.js | 10 ++- src/session/message_normalizer.js | 75 ++++++++++++++++++++++ test/message_normalizer.test.js | 100 ++++++++++++++++++++++++++++++ 5 files changed, 211 insertions(+), 2 deletions(-) create mode 100644 src/session/message_normalizer.js create mode 100644 test/message_normalizer.test.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 55981fba..48e579b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,28 @@ ## [1.3.1] - 2026-05-29 +### fix: strict chat templates reject mid-conversation system messages (#62) + +Qwen3 / Qwen3.5 chat templates (and other strict templates) under +llama.cpp `--jinja` raise `System message must be at the beginning.` and +llama.cpp returns HTTP 400 — but only when `tools` are present, since +that's when it compiles the template to build a tool-call grammar. +SmallCode injects system-role content mid-conversation (clarifier, plan +request, planner injection, path-validation warnings, skill activation, +compaction summaries), producing a messages array with `system` entries +at positions other than 0. + +- New `src/session/message_normalizer.js#consolidateSystemMessages()` + collapses all system-role messages into a single leading system + message (preserving order, de-duplicating identical blocks) and emits + only non-system turns after it. +- Applied in both request builders (`bin/smallcode.js` and + `bin/model_client.js` `chatCompletion`) right before the body is sent, + so it catches stray system messages regardless of which path injected + them. Verified end-to-end against a Qwen3 model: every tool-bearing + request now carries exactly one system message at index 0. +- Test coverage: `test/message_normalizer.test.js` (9 cases). + ### fix: compatibility issues #57, #58, #59 Three reported environment-compatibility bugs: diff --git a/bin/model_client.js b/bin/model_client.js index 834b567e..a45f7542 100644 --- a/bin/model_client.js +++ b/bin/model_client.js @@ -35,9 +35,13 @@ async function chatCompletion(ctx) { }); const _tools = ctx.getAllTools(config); + // Collapse any mid-conversation system messages into a single leading one + // so strict chat templates (Qwen3/Qwen3.5 under llama.cpp --jinja) don't + // 400 with "System message must be at the beginning." See issue #62. + const { consolidateSystemMessages } = require('../src/session/message_normalizer'); const body = { model: target.model, - messages: [systemMsg, ...processedMessages], + messages: consolidateSystemMessages([systemMsg, ...processedMessages]), temperature: 0.1, max_tokens: 4096, }; diff --git a/bin/smallcode.js b/bin/smallcode.js index 4f8d13cb..77255e12 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -90,6 +90,7 @@ const { PluginLoader } = require('../src/plugins/loader'); const { SkillManager } = require('../src/plugins/skills'); const { SessionStore } = require('../src/session/persistence'); const { resolveReferences, formatReferencesForPrompt } = require('../src/session/references'); +const { consolidateSystemMessages } = require('../src/session/message_normalizer'); const { TokenTracker } = require('../src/session/tokens'); const { UndoStack } = require('../src/session/undo'); const { shouldInjectGitContext, getGitDiffContext } = require('../src/session/git_context'); @@ -2161,9 +2162,16 @@ async function chatCompletion(config, messages) { }); const _tools = getAllTools(config, currentToolCategory); + // Consolidate any mid-conversation system messages into a single leading + // system message. Strict chat templates (Qwen3/Qwen3.5 under llama.cpp + // --jinja) reject a `system` role anywhere but index 0 and return HTTP 400 + // when tools are present. SmallCode injects system content mid-stream + // (clarifier, plan, planner, path warnings, skills, compaction), so we + // normalize here, right before the request is built. See issue #62. + const normalizedMessages = consolidateSystemMessages([systemMsg, ...processedWithImages]); const body = { model: target.model, - messages: [systemMsg, ...processedWithImages], + messages: normalizedMessages, temperature: 0.1, max_tokens: parseInt(process.env.SMALLCODE_MAX_OUTPUT_TOKENS) || 8192, }; diff --git a/src/session/message_normalizer.js b/src/session/message_normalizer.js new file mode 100644 index 00000000..cc44b259 --- /dev/null +++ b/src/session/message_normalizer.js @@ -0,0 +1,75 @@ +// SmallCode — Message Normalizer +// +// Some chat templates (notably Qwen3 / Qwen3.5 under llama.cpp with --jinja) +// enforce that a `system` role message may only appear at index 0 of the +// messages array. Their Jinja template raises: +// +// raise_exception('System message must be at the beginning.') +// +// …and llama.cpp returns HTTP 400 BEFORE the request is processed — but only +// when `tools` are present, because that's when it compiles the template to +// build a tool-call grammar. (See issue #62.) +// +// SmallCode legitimately injects system-role content mid-conversation in +// several places: clarification instructions, plan requests, planner +// injection, path-validation warnings, skill activation, and compaction +// summaries. Each of those pushes a `{ role: 'system', content }` object into +// the live conversation history, so by the time we assemble the request the +// array can look like: +// +// [system(prompt), user, assistant, system(plan), user, system(warning), ...] +// +// This module collapses any such array into a single leading system message +// followed by only non-system turns — satisfying strict templates while +// preserving the injected instructions (they're merged into the lead system +// message, not dropped). +// +// Design notes: +// - Order is preserved: stray system messages are appended to the lead +// system content in the order they appeared, so later instructions still +// come after earlier ones. +// - Non-string content (multimodal image arrays on user turns) is never +// touched — only `role: 'system'` entries are merged, and those are +// always plain strings in this codebase. +// - Idempotent: running it on an already-normalized array is a no-op. + +'use strict'; + +/** + * Collapse all system-role messages into a single leading system message. + * + * @param {Array<{role:string, content:any}>} messages OpenAI-style messages. + * @returns {Array} A new array with exactly one system message at index 0 + * (when any system content exists), followed by all non-system messages in + * their original order. The input array is not mutated. + */ +function consolidateSystemMessages(messages) { + if (!Array.isArray(messages) || messages.length === 0) return messages; + + const systemParts = []; + const rest = []; + + for (const msg of messages) { + if (msg && msg.role === 'system' && typeof msg.content === 'string') { + const trimmed = msg.content.trim(); + if (trimmed) systemParts.push(trimmed); + } else { + rest.push(msg); + } + } + + // No system content at all → return the non-system messages unchanged. + if (systemParts.length === 0) return rest.length === messages.length ? messages : rest; + + // De-duplicate consecutive identical blocks (the same instruction can be + // re-injected across turns; collapsing avoids ballooning the lead prompt). + const deduped = []; + for (const part of systemParts) { + if (deduped[deduped.length - 1] !== part) deduped.push(part); + } + + const merged = { role: 'system', content: deduped.join('\n\n') }; + return [merged, ...rest]; +} + +module.exports = { consolidateSystemMessages }; diff --git a/test/message_normalizer.test.js b/test/message_normalizer.test.js new file mode 100644 index 00000000..e2b9f529 --- /dev/null +++ b/test/message_normalizer.test.js @@ -0,0 +1,100 @@ +// SmallCode — message normalizer tests (issue #62) +// +// Strict chat templates (Qwen3/Qwen3.5 under llama.cpp --jinja) raise +// "System message must be at the beginning." and llama.cpp 400s when a +// system-role message appears anywhere but index 0 AND tools are present. +// consolidateSystemMessages() must guarantee exactly one leading system +// message. + +'use strict'; + +const { test } = require('node:test'); +const assert = require('node:assert'); + +const { consolidateSystemMessages } = require('../src/session/message_normalizer'); + +test('merges a mid-conversation system message into the leading one', () => { + const out = consolidateSystemMessages([ + { role: 'system', content: 'base prompt' }, + { role: 'user', content: 'hi' }, + { role: 'assistant', content: 'hello' }, + { role: 'system', content: 'PLAN: do the thing' }, + { role: 'user', content: 'go' }, + ]); + // Exactly one system message, at index 0. + assert.equal(out.filter(m => m.role === 'system').length, 1); + assert.equal(out[0].role, 'system'); + assert.match(out[0].content, /base prompt/); + assert.match(out[0].content, /PLAN: do the thing/); + // Non-system turns preserved in order. + assert.deepEqual(out.slice(1).map(m => m.role), ['user', 'assistant', 'user']); +}); + +test('order of merged system parts is preserved', () => { + const out = consolidateSystemMessages([ + { role: 'system', content: 'first' }, + { role: 'user', content: 'a' }, + { role: 'system', content: 'second' }, + { role: 'system', content: 'third' }, + ]); + assert.equal(out[0].content, 'first\n\nsecond\n\nthird'); +}); + +test('no system messages → array returned unchanged in content', () => { + const input = [ + { role: 'user', content: 'a' }, + { role: 'assistant', content: 'b' }, + ]; + const out = consolidateSystemMessages(input); + assert.deepEqual(out.map(m => m.role), ['user', 'assistant']); +}); + +test('idempotent on an already-normalized array', () => { + const once = consolidateSystemMessages([ + { role: 'system', content: 'sys' }, + { role: 'user', content: 'u' }, + ]); + const twice = consolidateSystemMessages(once); + assert.deepEqual(twice, once); +}); + +test('deduplicates identical consecutive system blocks', () => { + const out = consolidateSystemMessages([ + { role: 'system', content: 'same' }, + { role: 'user', content: 'u' }, + { role: 'system', content: 'same' }, + ]); + assert.equal(out[0].content, 'same'); +}); + +test('drops empty/whitespace-only system messages', () => { + const out = consolidateSystemMessages([ + { role: 'system', content: 'real' }, + { role: 'system', content: ' ' }, + { role: 'user', content: 'u' }, + ]); + assert.equal(out[0].content, 'real'); + assert.equal(out.filter(m => m.role === 'system').length, 1); +}); + +test('preserves multimodal user content untouched', () => { + const img = { role: 'user', content: [{ type: 'text', text: 'hi' }, { type: 'image_url', image_url: { url: 'data:...' } }] }; + const out = consolidateSystemMessages([ + { role: 'system', content: 'sys' }, + img, + ]); + assert.equal(out[1], img); // same reference, unmodified +}); + +test('handles only-system input (collapses to one)', () => { + const out = consolidateSystemMessages([ + { role: 'system', content: 'a' }, + { role: 'system', content: 'b' }, + ]); + assert.equal(out.length, 1); + assert.equal(out[0].content, 'a\n\nb'); +}); + +test('empty array is a no-op', () => { + assert.deepEqual(consolidateSystemMessages([]), []); +}); From e9432a8fba17619ca2805bda02daa8d3914cfe25 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Jun 2026 20:58:09 +0000 Subject: [PATCH 02/27] chore(deps): bump hono from 4.12.19 to 4.12.23 Bumps [hono](https://github.com/honojs/hono) from 4.12.19 to 4.12.23. - [Release notes](https://github.com/honojs/hono/releases) - [Commits](https://github.com/honojs/hono/compare/v4.12.19...v4.12.23) --- updated-dependencies: - dependency-name: hono dependency-version: 4.12.23 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- package-lock.json | 433 +--------------------------------------------- 1 file changed, 5 insertions(+), 428 deletions(-) diff --git a/package-lock.json b/package-lock.json index 10cfaca4..ed03231e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,7 +19,8 @@ "bin": { "smallcode": "bin/smallcode.js", "smallcode-init": "bin/init.js", - "smallcode-rag-index": "bin/rag-index.js" + "smallcode-rag-index": "bin/rag-index.js", + "smolv2": "bin/smallcode.js" }, "devDependencies": { "@types/node": "^25.9.0", @@ -1231,17 +1232,6 @@ "@babel/types": "^7.28.2" } }, - "node_modules/@types/debug": { - "version": "4.1.13", - "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.13.tgz", - "integrity": "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "@types/ms": "*" - } - }, "node_modules/@types/istanbul-lib-coverage": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz", @@ -1269,14 +1259,6 @@ "@types/istanbul-lib-report": "*" } }, - "node_modules/@types/ms": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz", - "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==", - "license": "MIT", - "optional": true, - "peer": true - }, "node_modules/@types/node": { "version": "25.9.0", "resolved": "https://registry.npmjs.org/@types/node/-/node-25.9.0.tgz", @@ -1754,17 +1736,6 @@ "sprintf-js": "~1.0.2" } }, - "node_modules/arr-union": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz", - "integrity": "sha512-sKpyeERZ02v1FeCZT8lrfJq5u6goHCtpTAzPwJYe7c8SPFOboNjNg1vz2L4VTn9T4PQxEx13TbXLmYUcS6Ug7Q==", - "license": "MIT", - "optional": true, - "peer": true, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/babel-jest": { "version": "30.4.1", "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-30.4.1.tgz", @@ -2265,24 +2236,6 @@ "wrap-ansi": "^7.0.0" } }, - "node_modules/clone-deep": { - "version": "0.2.4", - "resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz", - "integrity": "sha512-we+NuQo2DHhSl+DP6jlUiAhyAjBQrYnpOk15rN6c6JSPScjiCLh8IbSU+VTcph6YS3o7mASE8a0+gbZ7ChLpgg==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "for-own": "^0.1.3", - "is-plain-object": "^2.0.1", - "kind-of": "^3.0.2", - "lazy-cache": "^1.0.3", - "shallow-clone": "^0.1.2" - }, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/co": { "version": "4.6.0", "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", @@ -2921,31 +2874,6 @@ "node": ">=8" } }, - "node_modules/for-in": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/for-in/-/for-in-1.0.2.tgz", - "integrity": "sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ==", - "license": "MIT", - "optional": true, - "peer": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/for-own": { - "version": "0.1.5", - "resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz", - "integrity": "sha512-SKmowqGTJoPzLO1T0BBJpkfp3EMacCMOuH40hOUbrbzElVktk4DioXVM99QkLCyKoiuOmyjgcWMpVz2xjE7LZw==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "for-in": "^1.0.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/forwarded": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", @@ -2971,22 +2899,6 @@ "license": "MIT", "optional": true }, - "node_modules/fs-extra": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz", - "integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" - }, - "engines": { - "node": ">=12" - } - }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -3177,9 +3089,9 @@ } }, "node_modules/hono": { - "version": "4.12.19", - "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.19.tgz", - "integrity": "sha512-xa3eYXYXx68XTT4hZ7dRzsXBhaq85ToSrlUJNoR0gwz/1Ap/CNwX47wfvV7pc/xWhjKVVkLT7zBJy8chhNguqQ==", + "version": "4.12.23", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.23.tgz", + "integrity": "sha512-eIaZ9qDgu7XV0pxOCrg7/WhnQ6Ivm22UcxhXx/A3dcbqbbYgBEkc6e/J/s7j2tS96zoB0S9VBdLwQNCWwUo4LA==", "license": "MIT", "optional": true, "engines": { @@ -3329,25 +3241,6 @@ "dev": true, "license": "MIT" }, - "node_modules/is-buffer": { - "version": "1.1.6", - "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", - "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==", - "license": "MIT", - "optional": true, - "peer": true - }, - "node_modules/is-extendable": { - "version": "0.1.1", - "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz", - "integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==", - "license": "MIT", - "optional": true, - "peer": true, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/is-fullwidth-code-point": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", @@ -3367,20 +3260,6 @@ "node": ">=6" } }, - "node_modules/is-plain-object": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz", - "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "isobject": "^3.0.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/is-promise": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz", @@ -3407,17 +3286,6 @@ "devOptional": true, "license": "ISC" }, - "node_modules/isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==", - "license": "MIT", - "optional": true, - "peer": true, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/istanbul-lib-coverage": { "version": "3.2.2", "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz", @@ -4217,45 +4085,6 @@ "node": ">=6" } }, - "node_modules/jsonfile": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.1.tgz", - "integrity": "sha512-zwOTdL3rFQ/lRdBnntKVOX6k5cKJwEc1HdilT71BWEu7J41gXIB2MRp+vxduPSwZJPWBxEzv4yH1wYLJGUHX4Q==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "universalify": "^2.0.0" - }, - "optionalDependencies": { - "graceful-fs": "^4.1.6" - } - }, - "node_modules/kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "is-buffer": "^1.1.5" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/lazy-cache": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-1.0.4.tgz", - "integrity": "sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ==", - "license": "MIT", - "optional": true, - "peer": true, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/leven": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", @@ -4420,22 +4249,6 @@ "node": ">= 0.8" } }, - "node_modules/merge-deep": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/merge-deep/-/merge-deep-3.0.3.tgz", - "integrity": "sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "arr-union": "^3.1.0", - "clone-deep": "^0.2.4", - "kind-of": "^3.0.2" - }, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/merge-descriptors": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz", @@ -4539,32 +4352,6 @@ "node": ">=16 || 14 >=14.17" } }, - "node_modules/mixin-object": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/mixin-object/-/mixin-object-2.0.1.tgz", - "integrity": "sha512-ALGF1Jt9ouehcaXaHhn6t1yGWRqGaHkPFndtFVHfZXOvkIZ/yoGaSi0AHVTafb3ZBGg4dr/bDwnaEKqCXzchMA==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "for-in": "^0.1.3", - "is-extendable": "^0.1.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/mixin-object/node_modules/for-in": { - "version": "0.1.8", - "resolved": "https://registry.npmjs.org/for-in/-/for-in-0.1.8.tgz", - "integrity": "sha512-F0to7vbBSHP8E3l6dCjxNOLuSFAACIxFy3UehTUlG7svlXi37HHsDkyVcHo0Pq8QwrE+pXvWSVX3ZT1T9wAZ9g==", - "license": "MIT", - "optional": true, - "peer": true, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/mkdirp-classic": { "version": "0.5.3", "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", @@ -4819,14 +4606,6 @@ "node": ">=6" } }, - "node_modules/package-json-from-dist": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", - "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", - "license": "BlueOak-1.0.0", - "optional": true, - "peer": true - }, "node_modules/parse-json": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", @@ -4986,20 +4765,6 @@ "node": ">=8" } }, - "node_modules/playwright-extra": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/playwright-extra/-/playwright-extra-4.3.0.tgz", - "integrity": "sha512-/Hec3BmYMY/GznBo0ZPsSqm6IHil7jInxLz+9/UnBBC5Ozh2abVNnv+vYNJ+JKKRVtiKyHCrJpKRibJ9uribZw==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "debug": "^4.3.4" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/prebuild-install": { "version": "7.1.3", "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", @@ -5081,120 +4846,6 @@ "once": "^1.3.1" } }, - "node_modules/puppeteer-extra-plugin": { - "version": "3.2.3", - "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin/-/puppeteer-extra-plugin-3.2.3.tgz", - "integrity": "sha512-6RNy0e6pH8vaS3akPIKGg28xcryKscczt4wIl0ePciZENGE2yoaQJNd17UiEbdmh5/6WW6dPcfRWT9lxBwCi2Q==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "@types/debug": "^4.1.0", - "debug": "^4.1.1", - "merge-deep": "^3.0.1" - }, - "engines": { - "node": ">=9.11.2" - }, - "peerDependencies": { - "playwright-extra": "*", - "puppeteer-extra": "*" - }, - "peerDependenciesMeta": { - "playwright-extra": { - "optional": true - }, - "puppeteer-extra": { - "optional": true - } - } - }, - "node_modules/puppeteer-extra-plugin-stealth": { - "version": "2.11.0", - "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-stealth/-/puppeteer-extra-plugin-stealth-2.11.0.tgz", - "integrity": "sha512-BqckPV95MHP25quZgzBnZJD8S38ZYP4B3HJ3Kr/vibqxJxhK6L1VQ6jnu/JcFKV0wzCIQPrCiiavZnwE5u1C2A==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "debug": "^4.1.1", - "puppeteer-extra-plugin": "^3.2.2", - "puppeteer-extra-plugin-user-preferences": "^2.4.0" - }, - "engines": { - "node": ">=8" - }, - "peerDependencies": { - "playwright-extra": "*", - "puppeteer-extra": "*" - }, - "peerDependenciesMeta": { - "playwright-extra": { - "optional": true - }, - "puppeteer-extra": { - "optional": true - } - } - }, - "node_modules/puppeteer-extra-plugin-user-data-dir": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-data-dir/-/puppeteer-extra-plugin-user-data-dir-2.4.1.tgz", - "integrity": "sha512-kH1GnCcqEDoBXO7epAse4TBPJh9tEpVEK/vkedKfjOVOhZAvLkHGc9swMs5ChrJbRnf8Hdpug6TJlEuimXNQ+g==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "debug": "^4.1.1", - "fs-extra": "^10.0.0", - "puppeteer-extra-plugin": "^3.2.3", - "rimraf": "^3.0.2" - }, - "engines": { - "node": ">=8" - }, - "peerDependencies": { - "playwright-extra": "*", - "puppeteer-extra": "*" - }, - "peerDependenciesMeta": { - "playwright-extra": { - "optional": true - }, - "puppeteer-extra": { - "optional": true - } - } - }, - "node_modules/puppeteer-extra-plugin-user-preferences": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-preferences/-/puppeteer-extra-plugin-user-preferences-2.4.1.tgz", - "integrity": "sha512-i1oAZxRbc1bk8MZufKCruCEC3CCafO9RKMkkodZltI4OqibLFXF3tj6HZ4LZ9C5vCXZjYcDWazgtY69mnmrQ9A==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "debug": "^4.1.1", - "deepmerge": "^4.2.2", - "puppeteer-extra-plugin": "^3.2.3", - "puppeteer-extra-plugin-user-data-dir": "^2.4.1" - }, - "engines": { - "node": ">=8" - }, - "peerDependencies": { - "playwright-extra": "*", - "puppeteer-extra": "*" - }, - "peerDependenciesMeta": { - "playwright-extra": { - "optional": true - }, - "puppeteer-extra": { - "optional": true - } - } - }, "node_modules/pure-rand": { "version": "7.0.1", "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-7.0.1.tgz", @@ -5350,27 +5001,6 @@ "node": ">=8" } }, - "node_modules/rimraf": { - "version": "6.1.3", - "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-6.1.3.tgz", - "integrity": "sha512-LKg+Cr2ZF61fkcaK1UdkH2yEBBKnYjTyWzTJT6KNPcSPaiT7HSdhtMXQuN5wkTX0Xu72KQ1l8S42rlmexS2hSA==", - "license": "BlueOak-1.0.0", - "optional": true, - "peer": true, - "dependencies": { - "glob": "^13.0.3", - "package-json-from-dist": "^1.0.1" - }, - "bin": { - "rimraf": "dist/esm/bin.mjs" - }, - "engines": { - "node": "20 || >=22" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/router": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz", @@ -5475,48 +5105,6 @@ "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", "license": "ISC" }, - "node_modules/shallow-clone": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-0.1.2.tgz", - "integrity": "sha512-J1zdXCky5GmNnuauESROVu31MQSnLoYvlyEn6j2Ztk6Q5EHFIhxkMhYcv6vuDzl2XEzoRr856QwzMgWM/TmZgw==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "is-extendable": "^0.1.1", - "kind-of": "^2.0.1", - "lazy-cache": "^0.2.3", - "mixin-object": "^2.0.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/shallow-clone/node_modules/kind-of": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz", - "integrity": "sha512-0u8i1NZ/mg0b+W3MGGw5I7+6Eib2nx72S/QvXa0hYjEkjTknYmEYQJwGu3mLC0BrhtJjtQafTkyRUQ75Kx0LVg==", - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "is-buffer": "^1.0.2" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/shallow-clone/node_modules/lazy-cache": { - "version": "0.2.7", - "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-0.2.7.tgz", - "integrity": "sha512-gkX52wvU/R8DVMMt78ATVPFMJqfW8FPz1GZ1sVHBVQHmu/WvhIWE4cE1GBzhJNFicDeYhnwp6Rl35BcAIM3YOQ==", - "license": "MIT", - "optional": true, - "peer": true, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -6136,17 +5724,6 @@ "node": ">=4" } }, - "node_modules/universalify": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", - "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", - "license": "MIT", - "optional": true, - "peer": true, - "engines": { - "node": ">= 10.0.0" - } - }, "node_modules/unpipe": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", From c284b2f7e97974a8dadb0b83cdcc3c0aa6b87ce9 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Fri, 5 Jun 2026 12:33:33 -0700 Subject: [PATCH 03/27] fix(skills): discover nested and frontmatter-less skills Skills following the Claude Code layout (//SKILL.md) or written as plain .md without YAML frontmatter were silently skipped in the standard skill dirs (.smallcode/skills, ~/.smallcode/skills, ~/.config/smallcode/skills). Both shapes now load; README-style files (README/CHANGELOG/LICENSE/CONTRIBUTING) are filtered by name. Fixes #81 Constraint: no warning channel exists in SkillManager, so silent skips had no user-visible signal Rejected: warn-on-skip only | users following Claude Code conventions expect these layouts to work Confidence: high Scope-risk: narrow Not-tested: fullscreen TUI /skill list rendering (logic shared with classic mode) Co-Authored-By: Claude Opus 4.8 (1M context) --- src/plugins/skills.js | 74 ++++++++++++++++++++++++++----------------- test/skills.test.js | 36 +++++++++++++++++++++ 2 files changed, 81 insertions(+), 29 deletions(-) diff --git a/src/plugins/skills.js b/src/plugins/skills.js index 60c06bb1..d88354c7 100644 --- a/src/plugins/skills.js +++ b/src/plugins/skills.js @@ -16,6 +16,10 @@ // `.agents/skills` or `.claude/skills` typically have no frontmatter — they // are treated as `manual`-trigger skills named after their parent directory. // +// The standard skill dirs also accept the nested `/SKILL.md` layout and +// flat `.md` files without frontmatter (named after the file) — both were +// previously skipped silently (closes #81). README-style files are ignored. +// // Frontmatter accepts both LF and CRLF line endings (closes #52). const fs = require('fs'); @@ -24,6 +28,8 @@ const os = require('os'); const FM_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/; const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; +// Docs that live alongside skills but aren't skills themselves +const NON_SKILL_MD = /^(readme|changelog|license|contributing)\.md$/i; class SkillManager { constructor(projectDir) { @@ -71,14 +77,20 @@ class SkillManager { if (!dir || !fs.existsSync(dir)) return; let entries; try { - entries = fs.readdirSync(dir); + entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; } for (const entry of entries) { - if (!entry.endsWith('.md')) continue; - const full = path.join(dir, entry); - this._ingestFile(full, entry, dir); + if (entry.isDirectory()) { + // //SKILL.md inside a standard skill dir — users following + // the Claude Code layout expect this to work (closes #81) + this._loadSkillFolder(path.join(dir, entry.name), entry.name); + continue; + } + if (!entry.name.endsWith('.md') || NON_SKILL_MD.test(entry.name)) continue; + const full = path.join(dir, entry.name); + this._ingestFile(full, entry.name, dir, entry.name.replace(/\.md$/i, ''), 'flat'); } } @@ -92,38 +104,41 @@ class SkillManager { } for (const d of dirs) { if (!d.isDirectory()) continue; - const skillDir = path.join(root, d.name); - // Look for SKILL.md, skill.md, or any .md file inside the folder. - let skillFile = null; - const candidates = ['SKILL.md', 'skill.md', 'Skill.md']; - for (const c of candidates) { - const p = path.join(skillDir, c); - if (fs.existsSync(p)) { skillFile = p; break; } - } - if (!skillFile) { - // Fall back to first .md in the folder - try { - const md = fs.readdirSync(skillDir).find(f => f.endsWith('.md')); - if (md) skillFile = path.join(skillDir, md); - } catch {} - } - if (!skillFile) continue; - this._ingestFile(skillFile, path.basename(skillFile), skillDir, d.name); + this._loadSkillFolder(path.join(root, d.name), d.name); + } + } + + _loadSkillFolder(skillDir, name) { + // Look for SKILL.md, skill.md, or any .md file inside the folder. + let skillFile = null; + const candidates = ['SKILL.md', 'skill.md', 'Skill.md']; + for (const c of candidates) { + const p = path.join(skillDir, c); + if (fs.existsSync(p)) { skillFile = p; break; } + } + if (!skillFile) { + // Fall back to first .md in the folder + try { + const md = fs.readdirSync(skillDir).find(f => f.endsWith('.md')); + if (md) skillFile = path.join(skillDir, md); + } catch {} } + if (!skillFile) return; + this._ingestFile(skillFile, path.basename(skillFile), skillDir, name, 'nested'); } - _ingestFile(filePath, filename, dir, defaultName) { + _ingestFile(filePath, filename, dir, defaultName, origin) { let content; try { content = fs.readFileSync(filePath, 'utf-8'); } catch { return; } - const skill = this._parse(content, filename, dir, defaultName); + const skill = this._parse(content, filename, dir, defaultName, origin); if (skill) this.skills.set(skill.name, skill); } - _parse(content, filename, dir, defaultName) { + _parse(content, filename, dir, defaultName, origin) { // Parse YAML frontmatter (CRLF + LF tolerant — closes #52) const fmMatch = content.match(FM_RE); let frontmatter = ''; @@ -133,9 +148,10 @@ class SkillManager { frontmatter = fmMatch[1]; body = fmMatch[2]; } else if (!defaultName) { - // Flat-layout files without frontmatter aren't skills (could be a - // README). Nested-layout (.agents/skills//SKILL.md) files are - // accepted as plain-body skills using the parent directory name. + // Files without frontmatter and no derivable name aren't skills. + // Flat + nested loaders always pass a defaultName, so frontmatter-less + // files load as manual skills (closes #81); README-style files are + // filtered by name in _loadFlat. return null; } @@ -155,11 +171,11 @@ class SkillManager { return { name: meta.name || defaultName || filename.replace(/\.md$/i, ''), - trigger: meta.trigger || (defaultName ? 'manual' : 'manual'), + trigger: meta.trigger || 'manual', keywords: Array.isArray(meta.keywords) ? meta.keywords : [], content: body.trim(), path: path.join(dir, filename), - origin: defaultName ? 'nested' : 'flat', + origin: origin || (defaultName ? 'nested' : 'flat'), }; } diff --git a/test/skills.test.js b/test/skills.test.js index 087d756c..75ecf92f 100644 --- a/test/skills.test.js +++ b/test/skills.test.js @@ -98,6 +98,42 @@ test('list() reports nested skills with origin marker', () => { assert.equal(nested.origin, 'nested'); }); +test('issue #81: nested /SKILL.md inside .smallcode/skills is detected', () => { + const dir = freshProject(); + const skillFile = path.join(dir, '.smallcode', 'skills', 'my-skill', 'SKILL.md'); + write(skillFile, '# my skill\n\nDo nested things.'); + + const sm = new SkillManager(dir); + const got = sm.get('my-skill'); + assert.ok(got, 'nested skill inside .smallcode/skills should load'); + assert.equal(got.origin, 'nested'); + assert.match(got.content, /Do nested things\./); +}); + +test('issue #81: flat .md without frontmatter loads as manual skill', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'plain.md'), + '# Plain Skill\n\nNo frontmatter here.'); + + const sm = new SkillManager(dir); + const got = sm.get('plain'); + assert.ok(got, 'frontmatter-less flat skill should load'); + assert.equal(got.trigger, 'manual'); + assert.equal(got.origin, 'flat'); + assert.match(got.content, /No frontmatter here\./); +}); + +test('issue #81: README-style files in skill dirs are not skills', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'README.md'), '# About these skills'); + write(path.join(dir, '.smallcode', 'skills', 'real.md'), + '---\nname: real\ntrigger: manual\n---\nreal body'); + + const sm = new SkillManager(dir); + assert.equal(sm.get('README'), null); + assert.ok(sm.get('real')); +}); + test('add() persists a new skill and round-trips through .smallcode/skills', () => { const dir = freshProject(); const sm = new SkillManager(dir); From a5940da97e628420d8655e46a4f553a92440dc2d Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Fri, 5 Jun 2026 12:41:31 -0700 Subject: [PATCH 04/27] feat(wizard): list local models and reuse caller readline Two wizard UX fixes for local providers: - Ollama / LM Studio: fetch the installed model list from the OpenAI-compatible /models endpoint and offer a numbered picker instead of a blank free-text prompt. Falls back to manual entry when the server is unreachable or returns nothing. - Borrow the caller''s readline interface when provided. The wizard previously created a second readline on the same stdin while the TUI''s interface was still attached, so every keystroke echoed twice (duplicated letters while typing). Constraint: wizard must keep working when invoked without a readline (tool path) Rejected: pausing the caller readline around the wizard | borrowed rl is simpler and fixes echo at the source Confidence: high Scope-risk: narrow Not-tested: fullscreen TUI wizard flow (mock rl there has no question method; pre-existing #80 territory) Co-Authored-By: Claude Opus 4.8 (1M context) --- bin/commands.js | 2 +- bin/provider-wizard/wizard.js | 51 ++++++++++++++++++--- test/provider_wizard.test.js | 85 ++++++++++++++++++++++++++++++++++- 3 files changed, 131 insertions(+), 7 deletions(-) diff --git a/bin/commands.js b/bin/commands.js index 9ba5ebc8..d6c6436e 100644 --- a/bin/commands.js +++ b/bin/commands.js @@ -870,7 +870,7 @@ module.exports = function createCommandHandler(config, conversationHistory, impr console.log(pProviderStatus()); } else { const pWizard = require('./provider-wizard/wizard'); - const result = await pWizard.runWizard({ interactive: true }); + const result = await pWizard.runWizard({ interactive: true, rl }); if (result.success) { console.log(result.provider || ''); } diff --git a/bin/provider-wizard/wizard.js b/bin/provider-wizard/wizard.js index 70d3c833..aa52402f 100644 --- a/bin/provider-wizard/wizard.js +++ b/bin/provider-wizard/wizard.js @@ -70,6 +70,28 @@ async function validateApiKey(provider, apiKey, baseUrl) { } } +// List models from an OpenAI-compatible /models endpoint. Used to offer a +// picker for local providers (Ollama, LM Studio) where the installed models +// are knowable. Returns [] on any failure — caller falls back to free text. +async function fetchModels(baseUrl, apiKey) { + const url = (baseUrl || '').replace(/\/+$/, ''); + if (!url) return []; + try { + const headers = apiKey ? { 'Authorization': `Bearer ${apiKey}` } : {}; + const res = await fetch(`${url}/models`, { + headers, + signal: AbortSignal.timeout(5000), + }); + if (!res.ok) return []; + const data = await res.json(); + return (Array.isArray(data.data) ? data.data : []) + .map(m => m && m.id) + .filter(Boolean); + } catch { + return []; + } +} + function mergeEnvFile(filePath, newVars) { let lines = []; try { @@ -118,8 +140,11 @@ async function runWizard(options = {}) { // Load existing env const existingEnv = parseEnvFile(envPath); - let rl = null; - if (isInteractive) { + // Borrow the caller's readline when available — creating a second interface + // on the same stdin makes both echo every keystroke (duplicated letters). + const borrowedRl = options.rl && typeof options.rl.question === 'function' ? options.rl : null; + let rl = borrowedRl; + if (isInteractive && !rl) { rl = readline.createInterface({ input: process.stdin, output: process.stdout, @@ -212,7 +237,23 @@ async function runWizard(options = {}) { }; let model = options.model || ''; if (!model && isInteractive) { - model = await ask(rl, ' Model name', defaultModels[provider] || ''); + // Local providers: list installed models so the user can pick instead + // of typing the exact name. Falls back to free text if the server is + // unreachable or the list is empty. + if (!providerInfo.keyEnv) { + process.stdout.write(` Fetching models from ${baseUrl}...`); + const models = await fetchModels(baseUrl, apiKey); + if (models.length) { + console.log(` \x1b[32m${models.length} found\x1b[0m`); + const idx = await askNumber(rl, ' Select a model:', models); + if (idx >= 0) model = models[idx]; + } else { + console.log(' \x1b[33mnone found — enter manually\x1b[0m'); + } + } + if (!model) { + model = await ask(rl, ' Model name', defaultModels[provider] || ''); + } } model = model || defaultModels[provider] || ''; @@ -334,8 +375,8 @@ async function runWizard(options = {}) { return result; } finally { - if (rl) rl.close(); + if (rl && !borrowedRl) rl.close(); } } -module.exports = { runWizard, ask, askNumber, askYesNo, validateApiKey, mergeEnvFile }; +module.exports = { runWizard, ask, askNumber, askYesNo, validateApiKey, mergeEnvFile, fetchModels }; diff --git a/test/provider_wizard.test.js b/test/provider_wizard.test.js index 893282e1..0e4473c1 100644 --- a/test/provider_wizard.test.js +++ b/test/provider_wizard.test.js @@ -10,7 +10,7 @@ const os = require('node:os'); const path = require('node:path'); const { parseEnvFile, PROVIDERS, formatStatus } = require('../bin/provider-wizard/status'); -const { mergeEnvFile } = require('../bin/provider-wizard/wizard'); +const { mergeEnvFile, runWizard, fetchModels } = require('../bin/provider-wizard/wizard'); function tmp(prefix) { return fs.mkdtempSync(path.join(os.tmpdir(), `${prefix}-`)); @@ -77,6 +77,89 @@ test('mergeEnvFile creates content from scratch when file missing', () => { assert.match(out, /^NEW=yes$/m); }); +// Scripted readline stand-in: answers questions in order, records close(). +function fakeRl(answers) { + const queue = [...answers]; + return { + closed: false, + question(q, cb) { cb(queue.length ? queue.shift() : ''); }, + close() { this.closed = true; }, + }; +} + +async function withStubbedFetch(impl, fn) { + const orig = global.fetch; + global.fetch = impl; + try { return await fn(); } finally { global.fetch = orig; } +} + +async function inTmpCwd(fn) { + const dir = tmp('sc-pw-wiz'); + const orig = process.cwd(); + process.chdir(dir); + try { return await fn(dir); } finally { process.chdir(orig); } +} + +test('fetchModels returns ids from an OpenAI-compatible /models endpoint', async () => { + const got = await withStubbedFetch( + async () => ({ ok: true, json: async () => ({ data: [{ id: 'a' }, { id: 'b' }] }) }), + () => fetchModels('http://localhost:11434/v1'), + ); + assert.deepEqual(got, ['a', 'b']); +}); + +test('fetchModels returns [] on unreachable server', async () => { + const got = await withStubbedFetch( + async () => { throw new Error('ECONNREFUSED'); }, + () => fetchModels('http://localhost:1/v1'), + ); + assert.deepEqual(got, []); +}); + +test('wizard offers local model picker and uses the selection', async () => { + await inTmpCwd(async (dir) => { + const rl = fakeRl([ + '2', // provider: Ollama + '', // base URL: accept default + '2', // model picker: second entry + 'n', // no escalation + '2', // save to project only + ]); + const result = await withStubbedFetch( + async () => ({ ok: true, json: async () => ({ data: [{ id: 'm-one' }, { id: 'm-two' }] }) }), + () => runWizard({ interactive: true, rl }), + ); + assert.equal(result.success, true); + assert.equal(result.model, 'm-two'); + const env = parseEnvFile(path.join(dir, '.env')); + assert.equal(env.SMALLCODE_MODEL, 'm-two'); + assert.equal(env.SMALLCODE_PROVIDER, 'ollama'); + // Borrowed rl must not be closed by the wizard (issue: duplicated + // keystrokes came from a second readline on the same stdin) + assert.equal(rl.closed, false); + }); +}); + +test('wizard falls back to free-text model when listing fails', async () => { + await inTmpCwd(async (dir) => { + const rl = fakeRl([ + '2', // provider: Ollama + '', // base URL: accept default + 'typed-model', // manual model entry (picker unavailable) + 'n', // no escalation + '2', // save to project only + ]); + const result = await withStubbedFetch( + async () => { throw new Error('ECONNREFUSED'); }, + () => runWizard({ interactive: true, rl }), + ); + assert.equal(result.success, true); + assert.equal(result.model, 'typed-model'); + const env = parseEnvFile(path.join(dir, '.env')); + assert.equal(env.SMALLCODE_MODEL, 'typed-model'); + }); +}); + test('formatStatus renders provider, base url, model, escalation', () => { const out = formatStatus({ provider: 'openai', From 803e9bdba8558a6a046036a887381d4975ef0e77 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:34:38 -0700 Subject: [PATCH 05/27] feat(tui): drag-select chat text with copy to clipboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mouse selection scoped to the chat panel: drag highlights, release copies (ANSI-stripped) to the system clipboard. The 10-char role gutter and the tool panel never select. Dwelling at the panel edge auto-scrolls so selections extend beyond the visible window. Enables SGR 1002 (button-motion tracking) — 1000 alone reports no drag events, which also explains why text was previously unselectable in fullscreen mode at all. Constraint: motion events only arrive while the pointer moves, so edge auto-scroll needs dwell (repeat events), not hover Rejected: terminal-native selection via disabling mouse tracking | loses wheel scroll and cannot scope to the chat panel Confidence: high Scope-risk: narrow Not-tested: macOS/Linux clipboard paths (pbcopy/xclip) — same pattern as existing Ctrl+V paste Co-Authored-By: Claude Opus 4.8 (1M context) --- src/tui/fullscreen.js | 146 ++++++++++++++++++++++++++++- src/tui/terminal.js | 6 +- test/chat_selection.test.js | 179 ++++++++++++++++++++++++++++++++++++ 3 files changed, 328 insertions(+), 3 deletions(-) create mode 100644 test/chat_selection.test.js diff --git a/src/tui/fullscreen.js b/src/tui/fullscreen.js index 6a4c935f..305a0cf3 100644 --- a/src/tui/fullscreen.js +++ b/src/tui/fullscreen.js @@ -161,6 +161,12 @@ class FullScreenTUI { // Panel content buffers this.chatLines = []; // Rendered chat messages + + // Mouse text selection in the chat panel (drag to highlight, copy on + // release). Anchored to chatLines indices so scrolling doesn't shift it. + this.selection = null; // { anchor: {line, col}, head: {line, col} } + this._selecting = false; + this._lastDragY = null; // previous drag row — edge-dwell detection this.toolLines = []; // Tool execution log this.inputBuffer = ''; // Current user input this.inputCursor = 0; // Cursor position in input @@ -332,7 +338,8 @@ class FullScreenTUI { for (let i = 0; i < this.chatHeight; i++) { buf += ANSI.moveTo(i + 1, 1); - const line = visible[i] || ''; + let line = visible[i] || ''; + if (this.selection) line = this._highlightSelection(startLine + i, line); buf += fitAnsi(line, this.chatWidth); } @@ -905,6 +912,11 @@ class FullScreenTUI { this.render(); return; } + // Mouse press / drag / release (SGR) — text selection in the chat panel. + // Only the chat region selects; tool panel and input area are ignored. + if (key.includes('\x1b[<')) { + if (this._onMouseSelect(key)) return; + } // Ctrl+L — clear and redraw if (key === '\x0c') { @@ -1137,6 +1149,138 @@ class FullScreenTUI { this.render(); } + // ─── Mouse selection ───────────────────────────────────────────────── + + // Handle SGR mouse events for chat-panel text selection. + // Returns true when the chunk was consumed as selection input. + _onMouseSelect(data) { + const events = [...data.matchAll(/\x1b\[<(\d+);(\d+);(\d+)([Mm])/g)]; + if (events.length === 0) return false; + + let handled = false; + for (const ev of events) { + const btn = parseInt(ev[1]); + const x = parseInt(ev[2]); // 1-based column + const y = parseInt(ev[3]); // 1-based row + const isRelease = ev[4] === 'm'; + + // Left press inside the chat panel — start selecting + if (btn === 0 && !isRelease && !this._selecting) { + if (x <= this.chatWidth && y <= this.chatHeight) { + const pos = this._chatPosAt(x, y); + this.selection = { anchor: pos, head: pos }; + this._selecting = true; + this._lastDragY = null; + handled = true; + } else { + // Click outside the chat panel clears any old highlight + if (this.selection) { this.selection = null; this.render(); } + } + continue; + } + // Drag with left button held — extend selection. Staying at the + // panel's top/bottom edge (repeated edge events) auto-scrolls so the + // selection can extend beyond the visible window; merely reaching the + // edge row selects it without scrolling. + if (btn === 32 && this._selecting) { + const prevY = this._lastDragY; + this._lastDragY = y; + if (y <= 1 && prevY !== null && prevY <= 1) { + const maxBack = -(Math.max(0, this.chatLines.length - this.chatHeight)); + this.chatScroll = Math.max(maxBack, this.chatScroll - 1) || 0; // || 0 normalizes -0 + } else if (y > this.chatHeight || (y === this.chatHeight && prevY !== null && prevY >= this.chatHeight)) { + this.chatScroll = Math.min(0, this.chatScroll + 1); + } + this.selection.head = this._chatPosAt( + Math.min(x, this.chatWidth), + Math.max(1, Math.min(y, this.chatHeight)) + ); + handled = true; + continue; + } + // Release — copy and clear + if (btn === 0 && isRelease && this._selecting) { + this._selecting = false; + this._lastDragY = null; + const text = this._extractSelection(); + this.selection = null; + if (text) { + this._copyToClipboard(text); + const lines = text.split('\n').length; + this.addTool('clipboard', 'ok', `copied ${lines} line${lines === 1 ? '' : 's'}`); + } + handled = true; + } + } + if (handled) this.render(); + return handled; + } + + // Map a terminal (x, y) inside the chat panel to a chatLines position. + _chatPosAt(x, y) { + const startLine = Math.max(0, this.chatLines.length - this.chatHeight + this.chatScroll); + return { line: startLine + (y - 1), col: x - 1 }; + } + + // Chat lines carry a fixed 10-char gutter (8-char role label + '│ '). + // Selection clamps to the text area so the gutter never highlights or + // copies; a drag starting in the gutter selects from the text start. + static CHAT_GUTTER = 10; + + // Selection with anchor/head ordered top-to-bottom. + _normalizedSelection() { + if (!this.selection) return null; + const { anchor: a, head: h } = this.selection; + if (a.line < h.line || (a.line === h.line && a.col <= h.col)) { + return { start: a, end: h }; + } + return { start: h, end: a }; + } + + // Plain text covered by the current selection. + _extractSelection() { + const sel = this._normalizedSelection(); + if (!sel) return ''; + const gutter = FullScreenTUI.CHAT_GUTTER; + const out = []; + for (let i = sel.start.line; i <= sel.end.line; i++) { + if (i < 0 || i >= this.chatLines.length) continue; + const plain = this._stripAnsi(this.chatLines[i] || ''); + const from = Math.max(gutter, i === sel.start.line ? sel.start.col : 0); + const to = i === sel.end.line ? sel.end.col + 1 : plain.length; + out.push(to > from ? plain.slice(from, to).replace(/\s+$/, '') : ''); + } + return out.join('\n').replace(/\n+$/, ''); + } + + // Apply inverse-video highlight to the selected span of a chat line. + // Works on the ANSI-stripped text — colors drop while selected, which is + // the standard tradeoff for span-accurate highlighting. + _highlightSelection(lineIdx, line) { + const sel = this._normalizedSelection(); + if (!sel || lineIdx < sel.start.line || lineIdx > sel.end.line) return line; + const gutter = FullScreenTUI.CHAT_GUTTER; + const plain = this._stripAnsi(line); + const from = Math.max(gutter, Math.min( + lineIdx === sel.start.line ? sel.start.col : 0, plain.length)); + const to = lineIdx === sel.end.line ? Math.min(sel.end.col + 1, plain.length) : plain.length; + if (from >= to) return line; + return plain.slice(0, from) + '\x1b[7m' + plain.slice(from, to) + '\x1b[27m' + plain.slice(to); + } + + _copyToClipboard(text) { + try { + const { execSync } = require('child_process'); + if (process.platform === 'win32') { + execSync('powershell -noprofile -command "$input | Set-Clipboard"', { input: text, timeout: 3000 }); + } else if (process.platform === 'darwin') { + execSync('pbcopy', { input: text, timeout: 3000 }); + } else { + execSync('xclip -selection clipboard 2>/dev/null || xsel --clipboard --input 2>/dev/null', { input: text, timeout: 3000, shell: true }); + } + } catch {} + } + // ─── Utilities ─────────────────────────────────────────────────────── _truncate(str, maxLen) { diff --git a/src/tui/terminal.js b/src/tui/terminal.js index 7ae5583f..11913dc7 100644 --- a/src/tui/terminal.js +++ b/src/tui/terminal.js @@ -24,8 +24,10 @@ const SEQ = { hideCursor: '\x1b[?25l', showCursor: '\x1b[?25h', reset: '\x1b[0m', - mouseOn: '\x1b[?1000h\x1b[?1006h', // button tracking + SGR encoding - mouseOff: '\x1b[?1000l\x1b[?1006l', + // 1000 = presses/releases/wheel, 1002 = also motion while a button is held + // (needed for drag-selection in the chat panel), 1006 = SGR encoding + mouseOn: '\x1b[?1000h\x1b[?1002h\x1b[?1006h', + mouseOff: '\x1b[?1002l\x1b[?1000l\x1b[?1006l', pasteOn: '\x1b[?2004h', // bracketed paste pasteOff: '\x1b[?2004l', }; diff --git a/test/chat_selection.test.js b/test/chat_selection.test.js new file mode 100644 index 00000000..64e4a812 --- /dev/null +++ b/test/chat_selection.test.js @@ -0,0 +1,179 @@ +'use strict'; + +// SmallCode — chat panel mouse selection tests +// Drag-to-highlight + copy in the fullscreen TUI chat panel. Tool panel and +// input area must not select; the 10-char role gutter (' USER │ ') never +// highlights or copies; clipboard receives ANSI-stripped text. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { FullScreenTUI } = require('../src/tui/fullscreen'); + +// SGR mouse encodings +const press = (x, y) => `\x1b[<0;${x};${y}M`; +const drag = (x, y) => `\x1b[<32;${x};${y}M`; +const release = (x, y) => `\x1b[<0;${x};${y}m`; + +// Chat lines as addChat builds them: 8-char role label + '│ ' = 10-char +// gutter, then the message text. Text starts at 1-based column 11. +const USER = ' USER │ '; +const CONT = ' │ '; + +function makeTui(lines) { + const tui = new FullScreenTUI(); + tui.chatLines = lines; + tui.chatHeight = 10; + tui.chatWidth = 40; + tui.toolWidth = 30; + tui.chatScroll = 0; + tui.copied = null; + tui._copyToClipboard = (text) => { tui.copied = text; }; + tui.addTool = () => {}; + return tui; +} + +test('drag across two lines copies the span without gutter text', () => { + const tui = makeTui([USER + 'hello world', CONT + 'second line']); + tui._onMouseSelect(press(17, 1)); // "w" of world (col 16, 0-based) + tui._onMouseSelect(drag(16, 2)); // "d" of second + tui._onMouseSelect(release(16, 2)); + assert.equal(tui.copied, 'world\nsecond'); + assert.equal(tui.selection, null, 'selection cleared after copy'); +}); + +test('single-line selection respects column bounds', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(15, 1)); + tui._onMouseSelect(release(15, 1)); + assert.equal(tui.copied, 'hello'); +}); + +test('drag starting in the gutter selects from the text start', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(2, 1)); // inside " USER " label + tui._onMouseSelect(drag(15, 1)); + tui._onMouseSelect(release(15, 1)); + assert.equal(tui.copied, 'hello'); +}); + +test('gutter is never included on continuation lines', () => { + const tui = makeTui([USER + 'first', CONT + 'middle', CONT + 'last line']); + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(14, 3)); + tui._onMouseSelect(release(14, 3)); + assert.equal(tui.copied, 'first\nmiddle\nlast'); +}); + +test('reverse drag (bottom-up) normalizes to the same text', () => { + const tui = makeTui([USER + 'hello world', CONT + 'second line']); + tui._onMouseSelect(press(16, 2)); + tui._onMouseSelect(drag(17, 1)); + tui._onMouseSelect(release(17, 1)); + assert.equal(tui.copied, 'world\nsecond'); +}); + +test('ANSI color codes are stripped from copied text', () => { + const tui = makeTui(['\x1b[36m USER \x1b[0m│ \x1b[32mgreen text\x1b[0m here']); + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(20, 1)); + tui._onMouseSelect(release(20, 1)); + assert.equal(tui.copied, 'green text'); +}); + +test('clicks in the tool panel do not start a selection', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(45, 1)); // beyond chatWidth=40 + assert.equal(tui.selection, null); + assert.equal(tui._selecting, false); +}); + +test('clicks below the chat panel do not start a selection', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(5, 12)); // beyond chatHeight=10 + assert.equal(tui.selection, null); +}); + +test('selection accounts for chat scroll offset', () => { + const lines = []; + for (let i = 0; i < 30; i++) lines.push(CONT + `line-${i}`); + const tui = makeTui(lines); + tui.chatScroll = -5; // scrolled up 5 lines: top visible row = line-15 + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(17, 1)); + tui._onMouseSelect(release(17, 1)); + assert.equal(tui.copied, 'line-15'); +}); + +test('highlight covers the selected span but not the gutter', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(2, 1)); // starts in the gutter + tui._onMouseSelect(drag(15, 1)); + const out = tui._highlightSelection(0, USER + 'hello world'); + assert.equal(out, USER + '\x1b[7mhello\x1b[27m world'); +}); + +test('selection entirely inside the gutter copies nothing', () => { + const tui = makeTui([USER + 'hello world']); + tui._onMouseSelect(press(2, 1)); + tui._onMouseSelect(drag(6, 1)); + tui._onMouseSelect(release(6, 1)); + assert.equal(tui.copied, null, 'no clipboard write for gutter-only selection'); +}); + +test('dwelling at the bottom edge auto-scrolls down', () => { + const lines = []; + for (let i = 0; i < 30; i++) lines.push(CONT + `line-${i}`); + const tui = makeTui(lines); + tui.chatScroll = -5; // visible: line-15 .. line-24 + tui._onMouseSelect(press(11, 1)); // anchor line-15 + tui._onMouseSelect(drag(17, 10)); // reach bottom edge — no scroll yet + assert.equal(tui.chatScroll, -5, 'first edge event selects, does not scroll'); + tui._onMouseSelect(drag(17, 10)); // dwell → -4 + assert.equal(tui.chatScroll, -4); + tui._onMouseSelect(drag(17, 10)); // dwell → -3 + assert.equal(tui.chatScroll, -3); + tui._onMouseSelect(release(17, 10)); // head followed the scroll to line-26 + assert.match(tui.copied, /^line-15\n/); + assert.match(tui.copied, /line-26$/); +}); + +test('dwelling at the top edge auto-scrolls up', () => { + const lines = []; + for (let i = 0; i < 30; i++) lines.push(CONT + `line-${i}`); + const tui = makeTui(lines); // visible: line-20 .. line-29 + tui._onMouseSelect(press(17, 5)); // anchor end of line-24 + tui._onMouseSelect(drag(11, 1)); // reach top edge — no scroll yet + assert.equal(tui.chatScroll, 0, 'first edge event selects, does not scroll'); + tui._onMouseSelect(drag(11, 1)); // dwell → -1 + assert.equal(tui.chatScroll, -1); + tui._onMouseSelect(release(11, 1)); // head = line-19 text start + assert.match(tui.copied, /^line-19\n/); + assert.match(tui.copied, /line-24$/); +}); + +test('dragging past the panel bottom scrolls immediately', () => { + const lines = []; + for (let i = 0; i < 30; i++) lines.push(CONT + `line-${i}`); + const tui = makeTui(lines); + tui.chatScroll = -5; + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(17, 12)); // y beyond chatHeight → immediate scroll + assert.equal(tui.chatScroll, -4); +}); + +test('auto-scroll clamps at the ends of history', () => { + const tui = makeTui([USER + 'only line']); // fewer lines than chatHeight + tui._onMouseSelect(press(11, 1)); + tui._onMouseSelect(drag(15, 10)); // bottom edge at scroll 0 + assert.equal(tui.chatScroll, 0, 'cannot scroll past the newest line'); + tui._onMouseSelect(drag(15, 1)); // top edge with no history + assert.equal(tui.chatScroll, 0, 'cannot scroll past the oldest line'); +}); + +test('wheel events are not consumed by selection handler', () => { + const tui = makeTui([USER + 'hello world']); + const consumed = tui._onMouseSelect('\x1b[<64;5;5M'); + assert.equal(consumed, false); +}); From 086fa4a7456804dc301e279436e65a47518fc69d Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:30:24 -0700 Subject: [PATCH 06/27] feat(evolver): /evolve proposes skills from session friction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create-mode evolver: deterministic friction extraction from saved traces (repeated near-duplicate prompts, consecutive tool-retry loops), LLM judgment routed to the strong tier, and ONE quarantined skill draft per run written to .smallcode/skills/drafts/. Drafts never auto-load; /evolve promote moves them live. Validation gates every write (name format, no frontmatter injection, trigger rules); name collisions across live+draft+global dirs abort; every create appends to .smallcode/evolver-audit.jsonl. The per-run cap is structural — EvolverRun raises on a second create. Constraint: small models produce noisy judgments, so all fuzzy output passes validate-or-abort before any write Rejected: plugin delivery | needs TraceRecorder + SkillManager internals unreachable from plugin dirs under binary installs Confidence: high Scope-risk: narrow Directive: keep mechanics LLM-free — judgment stays in the command handler so mechanics remain unit-testable Not-tested: strong-tier routing with a separately configured SMALLCODE_MODEL_STRONG endpoint Co-Authored-By: Claude Opus 4.8 (1M context) --- bin/commands.js | 156 ++++++++++++++++++++ src/plugins/audit_log.js | 33 +++++ src/plugins/evolver.js | 174 ++++++++++++++++++++++ src/plugins/friction_analyzer.js | 123 ++++++++++++++++ src/plugins/skills.js | 30 ++++ src/tui/fullscreen.js | 1 + test/evolver.test.js | 240 +++++++++++++++++++++++++++++++ 7 files changed, 757 insertions(+) create mode 100644 src/plugins/audit_log.js create mode 100644 src/plugins/evolver.js create mode 100644 src/plugins/friction_analyzer.js create mode 100644 test/evolver.test.js diff --git a/bin/commands.js b/bin/commands.js index 9ba5ebc8..e1a7e11c 100644 --- a/bin/commands.js +++ b/bin/commands.js @@ -851,6 +851,7 @@ module.exports = function createCommandHandler(config, conversationHistory, impr console.log(` ${chalk.cyan('/budget')} ${chalk.gray('Show context window budget')}`); console.log(` ${chalk.cyan('/mcp')} ${chalk.gray('Show connected MCP servers')}`); console.log(` ${chalk.cyan('/skill')} ${chalk.gray('Manage reusable skills')}`); + console.log(` ${chalk.cyan('/evolve')} ${chalk.gray('Propose a new skill from session friction (list|promote|log)')}`); console.log(` ${chalk.cyan('/plugin')} ${chalk.gray('List installed plugins')}`); console.log(` ${chalk.cyan('/provider')} ${chalk.gray('Configure LLM provider (interactive wizard)')}`); console.log(` ${chalk.cyan('/sessions')} ${chalk.gray('List/resume saved sessions')}`); @@ -863,6 +864,161 @@ module.exports = function createCommandHandler(config, conversationHistory, impr rl.prompt(); return; + case '/evolve': { + const { SkillManager } = require('../src/plugins/skills'); + const sm = new SkillManager(process.cwd()); + const sub = (parts[1] || '').trim(); + + if (sub === 'list') { + const drafts = sm.listDrafts(); + if (drafts.length === 0) { + console.log(chalk.gray(' No skill drafts. Run /evolve to analyze recent sessions.')); + } else { + console.log(chalk.bold(` Drafts (${drafts.length}) — promote with /evolve promote :`)); + for (const d of drafts) console.log(` ${chalk.cyan(d)}`); + } + console.log(''); + rl.prompt(); + return; + } + + if (sub === 'promote') { + const name = (parts[2] || '').trim(); + if (!name) { console.log(chalk.gray(' Usage: /evolve promote ')); } + else { + const target = sm.promoteDraft(name); + if (target) console.log(` ${chalk.green('✓')} Promoted to ${chalk.cyan(target)} — active next session.`); + else console.log(chalk.red(` Draft "${name}" not found (or a live skill with that name exists).`)); + } + console.log(''); + rl.prompt(); + return; + } + + if (sub === 'log') { + const { readEntries } = require('../src/plugins/audit_log'); + const entries = readEntries(path.join(process.cwd(), '.smallcode', 'evolver-audit.jsonl'), 10); + if (entries.length === 0) console.log(chalk.gray(' No evolution events logged yet.')); + for (const e of entries) { + console.log(` ${chalk.gray(e.ts)} ${chalk.cyan(e.name)} ${chalk.gray(e.rationale.slice(0, 60))}`); + } + console.log(''); + rl.prompt(); + return; + } + + // No sub-command: run an evolution pass + const { TraceRecorder } = require('./trace_recorder'); + const { extractFrictionSignals, formatReportForPrompt } = require('../src/plugins/friction_analyzer'); + const evolver = require('../src/plugins/evolver'); + + const tr = new TraceRecorder(process.cwd()); + const traceList = tr.list().slice(0, 20); + if (traceList.length < 3) { + console.log(chalk.gray(` Only ${traceList.length} trace(s) recorded — need at least 3 sessions of data.`)); + console.log(''); + rl.prompt(); + return; + } + const traces = traceList.map(t => tr.load(t.id)).filter(Boolean); + + const skillKeywords = sm.list().flatMap(s => s.keywords || []); + const report = extractFrictionSignals(traces, { skillKeywords }); + const signalCount = report.repeated_patterns.length + report.tool_retry_loops.length; + if (signalCount === 0) { + console.log(chalk.gray(` No friction patterns in last ${traces.length} traces. Nothing to evolve.`)); + console.log(''); + rl.prompt(); + return; + } + + console.log(chalk.bold(` Friction signals (${signalCount}):`)); + console.log(chalk.gray(formatReportForPrompt(report).split('\n').map(l => ' ' + l).join('\n'))); + + // LLM judgment — route to the strong tier when configured + const { getModelTarget, buildAuthHeaders, withModelTarget } = require('./config'); + const target = getModelTarget(config, 'strong'); + process.stdout.write(chalk.gray(` Asking ${target.model} for a proposal... `)); + + const sysPrompt = 'You design reusable skills for a coding agent. A skill is a short markdown instruction injected when relevant. Given friction signals from recent sessions, propose ONE skill addressing the most impactful pattern. Respond with ONLY a JSON object: {"name": "kebab-case-name", "description": "one line", "trigger": "match", "keywords": ["k1","k2"], "body": "markdown instructions for the agent", "rationale": "why this helps"}'; + let proposalRaw = null; + try { + const resp = await fetch(`${target.baseUrl}/chat/completions`, { + method: 'POST', + headers: buildAuthHeaders(withModelTarget(config, target)), + body: JSON.stringify({ + model: target.model, + messages: [ + { role: 'system', content: sysPrompt }, + { role: 'user', content: `Friction signals:\n${formatReportForPrompt(report)}` }, + ], + temperature: 0.2, + max_tokens: 1024, + }), + }); + if (resp.ok) { + const data = await resp.json(); + proposalRaw = data?.choices?.[0]?.message?.content || null; + } else { + console.log(chalk.red(`HTTP ${resp.status}`)); + } + } catch (e) { + console.log(chalk.red(e.message)); + } + if (!proposalRaw) { console.log(''); rl.prompt(); return; } + + // Forgiving parse: strict JSON → fenced JSON → abort with raw output + let parsed = null; + try { parsed = JSON.parse(proposalRaw); } catch { + const m = proposalRaw.match(/\{[\s\S]*\}/); + if (m) { try { parsed = JSON.parse(m[0]); } catch {} } + } + if (!parsed) { + console.log(chalk.yellow('could not parse')); + console.log(chalk.gray(' Raw model output (nothing written):')); + console.log(chalk.gray(' ' + proposalRaw.slice(0, 500).split('\n').join('\n '))); + console.log(''); + rl.prompt(); + return; + } + console.log(chalk.green('ok')); + + const proposal = evolver.buildSkillProposal( + String(parsed.name || ''), String(parsed.description || ''), String(parsed.body || ''), + { trigger: parsed.trigger, keywords: parsed.keywords, rationale: String(parsed.rationale || '') } + ); + const errors = evolver.validateProposal(proposal); + if (errors.length) { + console.log(chalk.red(` Proposal rejected: ${errors.join('; ')}`)); + console.log(''); + rl.prompt(); + return; + } + const collision = evolver.checkNameCollision(proposal.name, process.cwd()); + if (collision) { + console.log(chalk.red(` Name collision with ${collision} — nothing written.`)); + console.log(''); + rl.prompt(); + return; + } + + const run = new evolver.EvolverRun(); + const draftPath = run.writeDraft(proposal, process.cwd()); + evolver.logCreateEvent( + path.join(process.cwd(), '.smallcode', 'evolver-audit.jsonl'), + proposal, proposal.rationale, + report.repeated_patterns.flatMap(p => p.traceIds).concat(report.tool_retry_loops.flatMap(l => l.traceIds)) + ); + + console.log(''); + console.log(` ${chalk.green('✓')} Draft: ${chalk.cyan(draftPath)}`); + console.log(chalk.gray(` "${proposal.description}"`)); + console.log(chalk.gray(` Review the file, then: /evolve promote ${proposal.name}`)); + console.log(''); + rl.prompt(); + return; + } + case '/provider': { const sub = (parts[1] || '').trim(); if (sub === 'status' || sub === '--status' || sub === '-s') { diff --git a/src/plugins/audit_log.js b/src/plugins/audit_log.js new file mode 100644 index 00000000..12b2591d --- /dev/null +++ b/src/plugins/audit_log.js @@ -0,0 +1,33 @@ +// SmallCode — Evolution Audit Log +// Thin JSONL appender/reader for evolver create events. One JSON object per +// line; append-only. Writes are atomic (tmp + rename) so a crash mid-write +// never corrupts existing history. + +const fs = require('fs'); +const path = require('path'); + +function appendEntry(filePath, entry) { + const dir = path.dirname(filePath); + if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); + const line = JSON.stringify(entry) + '\n'; + // Read-modify-write atomically: copy existing content + new line to a tmp + // file, then rename over the original. + let existing = ''; + try { existing = fs.readFileSync(filePath, 'utf-8'); } catch {} + const tmpPath = filePath + `.tmp.${process.pid}.${Date.now()}`; + fs.writeFileSync(tmpPath, existing + line, 'utf-8'); + fs.renameSync(tmpPath, filePath); +} + +function readEntries(filePath, limit = 100) { + let content = ''; + try { content = fs.readFileSync(filePath, 'utf-8'); } catch { return []; } + const entries = []; + for (const line of content.split('\n')) { + if (!line.trim()) continue; + try { entries.push(JSON.parse(line)); } catch {} + } + return entries.slice(-limit); +} + +module.exports = { appendEntry, readEntries }; diff --git a/src/plugins/evolver.js b/src/plugins/evolver.js new file mode 100644 index 00000000..139a3123 --- /dev/null +++ b/src/plugins/evolver.js @@ -0,0 +1,174 @@ +// SmallCode — Evolver (create-mode mechanics) +// Deterministic mechanics behind the /evolve command: proposal building, +// validation, name-collision checking, quarantined draft writing, audit +// logging, and structural enforcement of the 1-create-per-run cap. +// +// The fuzzy judgment (is this friction worth a skill?) happens in the +// command handler via an LLM call. Everything here is pure mechanics so it +// can be unit-tested without a model. +// +// Safety rules (mirrors the create-mode evolver pattern): +// - Drafts only: writes go to .smallcode/skills/drafts/, never live dirs +// - Never deletes, never commits +// - validateProposal must pass before any write +// - EvolverRun raises on the 2nd create in a single run + +const fs = require('fs'); +const path = require('path'); +const { appendEntry } = require('./audit_log'); + +const MAX_CREATES_PER_RUN = 1; +const NAME_RE = /^[A-Za-z0-9_-]+$/; +const VALID_TRIGGERS = new Set(['manual', 'auto', 'match']); + +class ProposalCapExceededError extends Error {} + +// ── Builders ────────────────────────────────────────────────────────────── + +function buildSkillProposal(name, description, body, options = {}) { + return { + kind: 'create', + artefact: 'skill', + name, + description, + body, + trigger: options.trigger || 'manual', + keywords: Array.isArray(options.keywords) ? options.keywords : [], + rationale: options.rationale || '', + }; +} + +// ── Validation ──────────────────────────────────────────────────────────── + +function validateProposal(proposal) { + const errors = []; + if (!proposal || typeof proposal !== 'object') return ['proposal must be an object']; + + if (proposal.artefact !== 'skill') { + errors.push(`artefact must be "skill", got ${JSON.stringify(proposal.artefact)}`); + } + if (typeof proposal.name !== 'string' || !NAME_RE.test(proposal.name)) { + errors.push('name must be a non-empty alphanumeric/-_ string'); + } + if (typeof proposal.description !== 'string' || !proposal.description.trim()) { + errors.push('description must be a non-empty string'); + } else if (/[\r\n]/.test(proposal.description)) { + errors.push('description must not contain newlines (frontmatter-injection risk)'); + } + if (typeof proposal.body !== 'string' || !proposal.body.trim()) { + errors.push('body must be a non-empty string'); + } + if (!VALID_TRIGGERS.has(proposal.trigger)) { + errors.push(`trigger must be one of manual|auto|match, got ${JSON.stringify(proposal.trigger)}`); + } + if (proposal.trigger === 'match' && (!Array.isArray(proposal.keywords) || proposal.keywords.length === 0)) { + errors.push('trigger "match" requires a non-empty keywords list'); + } + return errors; +} + +// ── Name-collision check ────────────────────────────────────────────────── + +// Look for an existing skill with this name across the standard skill dirs +// (live and drafts). Returns the first matching path or null. +function checkNameCollision(name, projectDir) { + const os = require('os'); + const roots = [ + path.join(projectDir, '.smallcode', 'skills'), + path.join(os.homedir(), '.smallcode', 'skills'), + path.join(os.homedir(), '.config', 'smallcode', 'skills'), + ]; + for (const root of roots) { + for (const candidate of [ + path.join(root, `${name}.md`), + path.join(root, name, 'SKILL.md'), + path.join(root, 'drafts', `${name}.md`), + ]) { + if (fs.existsSync(candidate)) return candidate; + } + } + return null; +} + +// ── Draft writer ────────────────────────────────────────────────────────── + +function _skillMd(proposal) { + const fm = [ + '---', + `name: ${proposal.name}`, + `description: ${proposal.description}`, + `trigger: ${proposal.trigger}`, + proposal.keywords.length ? `keywords: [${proposal.keywords.join(', ')}]` : null, + '---', + ].filter(Boolean).join('\n'); + let body = proposal.body.trim() + '\n'; + if (proposal.rationale) { + body += `\n/g, '')} -->\n`; + } + return `${fm}\n${body}`; +} + +function writeDraft(proposal, projectDir) { + const errors = validateProposal(proposal); + if (errors.length) throw new Error(`invalid proposal: ${errors.join('; ')}`); + + const draftsDir = path.resolve(projectDir, '.smallcode', 'skills', 'drafts'); + const target = path.resolve(draftsDir, `${proposal.name}.md`); + // Path containment — name is already validated, but defend anyway + if (!target.startsWith(draftsDir + path.sep)) { + throw new Error(`draft path escapes drafts dir: ${target}`); + } + if (!fs.existsSync(draftsDir)) fs.mkdirSync(draftsDir, { recursive: true }); + const tmpPath = target + `.tmp.${process.pid}.${Date.now()}`; + fs.writeFileSync(tmpPath, _skillMd(proposal), 'utf-8'); + fs.renameSync(tmpPath, target); + return target; +} + +// ── Audit log ───────────────────────────────────────────────────────────── + +function logCreateEvent(auditPath, proposal, rationale, sourceTraceIds) { + appendEntry(auditPath, { + ts: new Date().toISOString(), + kind: 'create', + artefact: proposal.artefact, + name: proposal.name, + rationale: rationale || proposal.rationale || '', + source_traces: Array.isArray(sourceTraceIds) ? sourceTraceIds : [], + }); +} + +// ── Per-run cap (structural) ────────────────────────────────────────────── + +// Stateful tracker enforcing the create cap by construction. Use this, not +// writeDraft directly, when running an evolution pass. +class EvolverRun { + constructor(maxCreates = MAX_CREATES_PER_RUN) { + this.maxCreates = maxCreates; + this.createsSoFar = 0; + this.written = []; + } + + writeDraft(proposal, projectDir) { + if (proposal && proposal.kind === 'create' && this.createsSoFar >= this.maxCreates) { + throw new ProposalCapExceededError( + `already wrote ${this.createsSoFar} create(s); cap is ${this.maxCreates}` + ); + } + const target = writeDraft(proposal, projectDir); + if (proposal.kind === 'create') this.createsSoFar++; + this.written.push(target); + return target; + } +} + +module.exports = { + buildSkillProposal, + validateProposal, + checkNameCollision, + writeDraft, + logCreateEvent, + EvolverRun, + ProposalCapExceededError, + MAX_CREATES_PER_RUN, +}; diff --git a/src/plugins/friction_analyzer.js b/src/plugins/friction_analyzer.js new file mode 100644 index 00000000..2ba25bed --- /dev/null +++ b/src/plugins/friction_analyzer.js @@ -0,0 +1,123 @@ +// SmallCode — Friction Analyzer +// Deterministic friction-signal extraction from saved traces. No LLM calls — +// this produces the evidence the /evolve command hands to the model for +// judgment. +// +// Signals: +// - repeated_patterns: near-duplicate prompts appearing 3+ times with no +// matching skill keyword (the user keeps asking for the same thing by hand) +// - tool_retry_loops: 3+ consecutive failed calls of the same tool against +// the same file within a trace (the model keeps fighting the same wall) + +const REPEAT_THRESHOLD = 3; +const RETRY_THRESHOLD = 3; +const SIMILARITY_THRESHOLD = 0.5; + +function _wordSet(text) { + return new Set( + String(text || '').toLowerCase().split(/[^a-z0-9]+/).filter(w => w.length > 2) + ); +} + +function _jaccard(a, b) { + if (a.size === 0 && b.size === 0) return 0; + let inter = 0; + for (const w of a) if (b.has(w)) inter++; + return inter / (a.size + b.size - inter); +} + +function _isError(result) { + const s = String(result || ''); + return s.startsWith('✗') || /"error"\s*:/.test(s) || /^Error[:\s]/.test(s); +} + +// Group traces whose prompts are near-duplicates (Jaccard on word sets). +function _findRepeatedPatterns(traces, skillKeywords) { + const groups = []; // { words, prompts, traceIds } + for (const t of traces) { + const words = _wordSet(t.prompt); + if (words.size === 0) continue; + let placed = false; + for (const g of groups) { + if (_jaccard(words, g.words) >= SIMILARITY_THRESHOLD) { + g.prompts.push(t.prompt); + g.traceIds.push(t.id); + for (const w of words) g.words.add(w); + placed = true; + break; + } + } + if (!placed) groups.push({ words, prompts: [t.prompt], traceIds: [t.id] }); + } + + return groups + .filter(g => g.prompts.length >= REPEAT_THRESHOLD) + // Skip patterns a skill already covers (any keyword hits the group words) + .filter(g => !skillKeywords.some(kw => g.words.has(String(kw).toLowerCase()))) + .map(g => ({ + pattern: g.prompts[0].slice(0, 120), + count: g.prompts.length, + traceIds: g.traceIds, + })); +} + +// Detect consecutive failed calls of the same tool+file within each trace. +function _findToolRetryLoops(traces) { + const loops = []; + for (const t of traces) { + let runTool = null, runFile = null, failCount = 0; + const flush = () => { + if (failCount >= RETRY_THRESHOLD) { + loops.push({ tool: runTool, file: runFile, failCount, traceIds: [t.id] }); + } + runTool = null; runFile = null; failCount = 0; + }; + for (const step of t.steps || []) { + if (step.type !== 'tool_call') continue; + let file = ''; + try { + const args = typeof step.args === 'string' ? JSON.parse(step.args) : (step.args || {}); + file = args.path || args.file || ''; + } catch {} + const failed = _isError(step.result); + if (failed && step.name === runTool && file === runFile) { + failCount++; + } else { + flush(); + if (failed) { runTool = step.name; runFile = file; failCount = 1; } + } + } + flush(); + } + return loops; +} + +/** + * @param {object[]} traces - full trace objects (TraceRecorder.load shape) + * @param {object} options - { skillKeywords: string[] } keywords of existing skills + * @returns FrictionReport + */ +function extractFrictionSignals(traces, options = {}) { + const skillKeywords = options.skillKeywords || []; + const safe = (traces || []).filter(t => t && typeof t === 'object'); + return { + repeated_patterns: _findRepeatedPatterns(safe, skillKeywords), + tool_retry_loops: _findToolRetryLoops(safe), + analyzed_traces: safe.length, + }; +} + +// Compact text rendering of a friction report for the LLM prompt — counts +// and short descriptions only, never full trace content (budget guard). +function formatReportForPrompt(report) { + const lines = []; + for (const p of report.repeated_patterns) { + lines.push(`- Repeated request (${p.count}x): "${p.pattern}"`); + } + for (const l of report.tool_retry_loops) { + lines.push(`- Tool retry loop: ${l.tool} failed ${l.failCount}x in a row on ${l.file || '(no file)'}`); + } + return lines.join('\n').slice(0, 2000); +} + +module.exports = { extractFrictionSignals, formatReportForPrompt }; diff --git a/src/plugins/skills.js b/src/plugins/skills.js index d88354c7..c4f5206a 100644 --- a/src/plugins/skills.js +++ b/src/plugins/skills.js @@ -83,6 +83,9 @@ class SkillManager { } for (const entry of entries) { if (entry.isDirectory()) { + // drafts/ is quarantined — evolver proposals live there until a + // human promotes them (/evolve promote ). Never auto-load. + if (entry.name === 'drafts') continue; // //SKILL.md inside a standard skill dir — users following // the Claude Code layout expect this to work (closes #81) this._loadSkillFolder(path.join(dir, entry.name), entry.name); @@ -244,6 +247,33 @@ class SkillManager { return skill; } + // Promote a quarantined draft (.smallcode/skills/drafts/.md) into + // the live project skill dir and load it. Returns the new path or null. + promoteDraft(name) { + const safe = String(name || '').replace(/[^a-z0-9-_]/gi, ''); + if (!safe) return null; + const draftsDir = path.join(this.projectDir, '.smallcode', 'skills', 'drafts'); + const source = path.join(draftsDir, `${safe}.md`); + if (!fs.existsSync(source)) return null; + const target = path.join(this.projectDir, '.smallcode', 'skills', `${safe}.md`); + if (fs.existsSync(target)) return null; // never overwrite a live skill + fs.renameSync(source, target); + this._ingestFile(target, `${safe}.md`, path.dirname(target), safe, 'flat'); + return target; + } + + // List quarantined drafts (names only) + listDrafts() { + const draftsDir = path.join(this.projectDir, '.smallcode', 'skills', 'drafts'); + try { + return fs.readdirSync(draftsDir) + .filter(f => f.endsWith('.md')) + .map(f => f.replace(/\.md$/i, '')); + } catch { + return []; + } + } + // Remove a skill remove(name) { const skill = this.skills.get(name); diff --git a/src/tui/fullscreen.js b/src/tui/fullscreen.js index 6a4c935f..5dd8c02a 100644 --- a/src/tui/fullscreen.js +++ b/src/tui/fullscreen.js @@ -192,6 +192,7 @@ class FullScreenTUI { { cmd: '/cognition', alias: null, desc: 'MarrowScript cognition status' }, { cmd: '/mcp', alias: null, desc: 'Connected MCP servers' }, { cmd: '/skill', alias: null, desc: 'Manage reusable skills' }, + { cmd: '/evolve', alias: null, desc: 'Propose skill from session friction' }, { cmd: '/plugin', alias: null, desc: 'Manage plugins' }, { cmd: '/sessions', alias: null, desc: 'List/resume sessions' }, { cmd: '/session', alias: null, desc: 'Parallel sessions' }, diff --git a/test/evolver.test.js b/test/evolver.test.js new file mode 100644 index 00000000..ffb42a7a --- /dev/null +++ b/test/evolver.test.js @@ -0,0 +1,240 @@ +'use strict'; + +// SmallCode — Evolver (create-mode) tests +// Pins the deterministic mechanics behind /evolve: proposal validation, +// quarantined draft writing, the structural 1-create-per-run cap, friction +// extraction from traces, and the SkillManager drafts quarantine. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const evolver = require('../src/plugins/evolver'); +const { extractFrictionSignals, formatReportForPrompt } = require('../src/plugins/friction_analyzer'); +const { appendEntry, readEntries } = require('../src/plugins/audit_log'); +const { SkillManager } = require('../src/plugins/skills'); + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-evolver-')); +} + +function trace(id, prompt, steps = []) { + return { id, prompt, steps, tokens: { prompt: 0, completion: 0 } }; +} + +function failedStep(tool, file) { + return { type: 'tool_call', name: tool, args: JSON.stringify({ path: file }), result: '✗ failed' }; +} + +// ── Proposal building + validation ─────────────────────────────────────── + +test('buildSkillProposal returns a complete create proposal', () => { + const p = evolver.buildSkillProposal('my-skill', 'does things', 'Body here.', { + trigger: 'match', keywords: ['foo'], rationale: 'seen 3x', + }); + assert.equal(p.kind, 'create'); + assert.equal(p.artefact, 'skill'); + assert.equal(p.trigger, 'match'); + assert.deepEqual(p.keywords, ['foo']); +}); + +test('validateProposal accepts a valid proposal', () => { + const p = evolver.buildSkillProposal('ok-name', 'desc', 'body'); + assert.deepEqual(evolver.validateProposal(p), []); +}); + +test('validateProposal rejects bad names, empty fields, newline descriptions', () => { + const bad = (over) => evolver.validateProposal({ + ...evolver.buildSkillProposal('ok', 'desc', 'body'), ...over, + }); + assert.ok(bad({ name: 'has space' }).length > 0); + assert.ok(bad({ name: '../traverse' }).length > 0); + assert.ok(bad({ name: '' }).length > 0); + assert.ok(bad({ description: '' }).length > 0); + assert.ok(bad({ description: 'line1\nline2' }).length > 0, 'newline = frontmatter injection'); + assert.ok(bad({ body: ' ' }).length > 0); + assert.ok(bad({ trigger: 'bogus' }).length > 0); +}); + +test('validateProposal requires keywords for match trigger', () => { + const p = evolver.buildSkillProposal('m', 'd', 'b', { trigger: 'match', keywords: [] }); + assert.ok(evolver.validateProposal(p).length > 0); +}); + +// ── Collision check ─────────────────────────────────────────────────────── + +test('checkNameCollision finds existing flat and draft skills', () => { + const dir = freshProject(); + const skillsDir = path.join(dir, '.smallcode', 'skills'); + fs.mkdirSync(path.join(skillsDir, 'drafts'), { recursive: true }); + fs.writeFileSync(path.join(skillsDir, 'live-skill.md'), '---\nname: live-skill\n---\nx'); + fs.writeFileSync(path.join(skillsDir, 'drafts', 'pending.md'), '---\nname: pending\n---\nx'); + + assert.ok(evolver.checkNameCollision('live-skill', dir)); + assert.ok(evolver.checkNameCollision('pending', dir)); + assert.equal(evolver.checkNameCollision('brand-new', dir), null); +}); + +// ── Draft writing + cap ─────────────────────────────────────────────────── + +test('writeDraft writes to drafts/ quarantine with frontmatter', () => { + const dir = freshProject(); + const p = evolver.buildSkillProposal('drafted', 'a draft', 'Draft body.', { rationale: 'why' }); + const target = evolver.writeDraft(p, dir); + assert.match(target, /[\\/]drafts[\\/]drafted\.md$/); + const content = fs.readFileSync(target, 'utf-8'); + assert.match(content, /^---\nname: drafted\n/); + assert.match(content, /Draft body\./); + assert.match(content, /Rationale: why/); +}); + +test('writeDraft refuses invalid proposals', () => { + const dir = freshProject(); + assert.throws(() => evolver.writeDraft({ artefact: 'skill', name: 'x y', body: 'b' }, dir)); +}); + +test('EvolverRun allows one create, raises on the second', () => { + const dir = freshProject(); + const run = new evolver.EvolverRun(); + run.writeDraft(evolver.buildSkillProposal('first', 'd', 'b'), dir); + assert.throws( + () => run.writeDraft(evolver.buildSkillProposal('second', 'd', 'b'), dir), + evolver.ProposalCapExceededError + ); + assert.equal(run.createsSoFar, 1); +}); + +// ── Friction analysis ───────────────────────────────────────────────────── + +test('extractFrictionSignals returns empty report for no traces', () => { + const r = extractFrictionSignals([]); + assert.deepEqual(r.repeated_patterns, []); + assert.deepEqual(r.tool_retry_loops, []); + assert.equal(r.analyzed_traces, 0); +}); + +test('three near-identical prompts flag a repeated pattern', () => { + const traces = [ + trace('a1', 'convert this csv file to json format'), + trace('a2', 'convert the csv file into json format please'), + trace('a3', 'csv file convert to json format again'), + trace('b1', 'write unit tests for the auth module'), + ]; + const r = extractFrictionSignals(traces); + assert.equal(r.repeated_patterns.length, 1); + assert.equal(r.repeated_patterns[0].count, 3); + assert.deepEqual(r.repeated_patterns[0].traceIds.sort(), ['a1', 'a2', 'a3']); +}); + +test('repeated pattern covered by an existing skill keyword is suppressed', () => { + const traces = [ + trace('a1', 'convert this csv file to json format'), + trace('a2', 'convert the csv file into json format please'), + trace('a3', 'csv file convert to json format again'), + ]; + const r = extractFrictionSignals(traces, { skillKeywords: ['csv'] }); + assert.equal(r.repeated_patterns.length, 0); +}); + +test('three consecutive same-tool failures flag a retry loop', () => { + const t = trace('t1', 'fix the parser', [ + failedStep('patch', 'src/parser.js'), + failedStep('patch', 'src/parser.js'), + failedStep('patch', 'src/parser.js'), + ]); + const r = extractFrictionSignals([t]); + assert.equal(r.tool_retry_loops.length, 1); + assert.equal(r.tool_retry_loops[0].failCount, 3); + assert.equal(r.tool_retry_loops[0].tool, 'patch'); +}); + +test('interrupted failures do not flag a retry loop', () => { + const t = trace('t1', 'fix it', [ + failedStep('patch', 'a.js'), + failedStep('patch', 'a.js'), + { type: 'tool_call', name: 'read_file', args: '{"path":"a.js"}', result: 'content' }, + failedStep('patch', 'a.js'), + ]); + const r = extractFrictionSignals([t]); + assert.equal(r.tool_retry_loops.length, 0); +}); + +test('formatReportForPrompt stays compact', () => { + const r = extractFrictionSignals([ + trace('a1', 'x'.repeat(500) + ' aaa bbb ccc'), + ]); + assert.ok(formatReportForPrompt(r).length <= 2000); +}); + +// ── Drafts quarantine in SkillManager ───────────────────────────────────── + +test('SkillManager never auto-loads skills from drafts/', () => { + const dir = freshProject(); + const draftsDir = path.join(dir, '.smallcode', 'skills', 'drafts'); + fs.mkdirSync(draftsDir, { recursive: true }); + fs.writeFileSync(path.join(draftsDir, 'lurker.md'), '---\nname: lurker\ntrigger: auto\n---\nshould not load'); + + const sm = new SkillManager(dir); + assert.equal(sm.get('lurker'), null, 'draft must stay quarantined'); +}); + +test('promoteDraft moves draft live and a fresh SkillManager loads it', () => { + const dir = freshProject(); + evolver.writeDraft(evolver.buildSkillProposal('riser', 'promoted skill', 'Now live.'), dir); + + const sm = new SkillManager(dir); + assert.equal(sm.get('riser'), null); + const target = sm.promoteDraft('riser'); + assert.ok(target); + assert.ok(sm.get('riser'), 'promoted skill loads in the same manager'); + + const sm2 = new SkillManager(dir); + assert.ok(sm2.get('riser'), 'promoted skill loads in a fresh manager'); + assert.equal(sm2.listDrafts().length, 0); +}); + +test('promoteDraft never overwrites an existing live skill', () => { + const dir = freshProject(); + const skillsDir = path.join(dir, '.smallcode', 'skills'); + fs.mkdirSync(skillsDir, { recursive: true }); + fs.writeFileSync(path.join(skillsDir, 'taken.md'), '---\nname: taken\n---\noriginal'); + evolver.writeDraft(evolver.buildSkillProposal('taken', 'd', 'impostor'), dir); + + const sm = new SkillManager(dir); + assert.equal(sm.promoteDraft('taken'), null); + assert.match(fs.readFileSync(path.join(skillsDir, 'taken.md'), 'utf-8'), /original/); +}); + +test('listDrafts reports quarantined names', () => { + const dir = freshProject(); + evolver.writeDraft(evolver.buildSkillProposal('one', 'd', 'b'), dir); + const sm = new SkillManager(dir); + assert.deepEqual(sm.listDrafts(), ['one']); +}); + +// ── Audit log ───────────────────────────────────────────────────────────── + +test('audit log appends and reads back entries', () => { + const dir = freshProject(); + const file = path.join(dir, '.smallcode', 'evolver-audit.jsonl'); + appendEntry(file, { ts: 't1', kind: 'create', name: 'a' }); + appendEntry(file, { ts: 't2', kind: 'create', name: 'b' }); + const entries = readEntries(file); + assert.equal(entries.length, 2); + assert.equal(entries[1].name, 'b'); +}); + +test('logCreateEvent writes a well-formed audit row', () => { + const dir = freshProject(); + const file = path.join(dir, '.smallcode', 'evolver-audit.jsonl'); + const p = evolver.buildSkillProposal('logged', 'd', 'b', { rationale: 'because' }); + evolver.logCreateEvent(file, p, 'because', ['t1', 't2']); + const [e] = readEntries(file); + assert.equal(e.kind, 'create'); + assert.equal(e.artefact, 'skill'); + assert.equal(e.name, 'logged'); + assert.deepEqual(e.source_traces, ['t1', 't2']); + assert.ok(e.ts); +}); From 4896c27ec01e6af18ec0428304ef3e400f896ce5 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:48:17 -0700 Subject: [PATCH 07/27] feat(skills): lazy index-first loading + use_skill tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SkillManager now reads only frontmatter on startup (_index Map) and loads bodies on demand via _loadBody(), cached in skills Map. This cuts per-turn skill injection from ~60k chars (all bodies) to ~240 chars (compact index) for a typical 30-skill install. New surface: getIndex() flat list, formatSkillIndex/formatSkillResult in skill_index_formatter.js, use_skill tool (executor + tools.js). getSkillContext() injects the index always; auto-matched bodies append after, subject to the existing 4000-char cap. Public API (get/list/getAutoSkills/formatForPrompt/add/remove/ promoteDraft/listDrafts) is unchanged — all 335 tests pass. Rejected: inject all bodies always | O(skills) context cost per turn Constraint: existing tests must pass unmodified Confidence: high Scope-risk: moderate Not-tested: live use_skill call by real model (requires interactive session) --- bin/executor.js | 18 ++ bin/smallcode.js | 25 ++- bin/tools.js | 1 + src/plugins/skill_index_formatter.js | 39 +++++ src/plugins/skills.js | 235 ++++++++++++++++++++------- test/skill_lazy.test.js | 190 ++++++++++++++++++++++ 6 files changed, 443 insertions(+), 65 deletions(-) create mode 100644 src/plugins/skill_index_formatter.js create mode 100644 test/skill_lazy.test.js diff --git a/bin/executor.js b/bin/executor.js index 87314a30..091aa28a 100644 --- a/bin/executor.js +++ b/bin/executor.js @@ -840,6 +840,24 @@ async function executeTool(name, args, ctx) { return { result: '' }; } + case 'use_skill': { + const skillManager = ctx.skillManager || null; + if (!skillManager) return { error: 'use_skill: skill system not available' }; + const skillName = String(args.name || '').trim(); + if (!skillName) return { error: 'use_skill: name is required' }; + const skill = skillManager.get(skillName); + if (!skill) { + const validNames = skillManager.getIndex().map(e => e.name).slice(0, 10); + return { error: `use_skill: skill "${skillName}" not found. Valid names: ${validNames.join(', ')}` }; + } + const { formatSkillResult } = require('../src/plugins/skill_index_formatter'); + const index = skillManager.getIndex(); + const relatedEntries = (skill.related || []) + .map(r => index.find(e => e.name === r)) + .filter(Boolean); + return { result: formatSkillResult(skill, relatedEntries) }; + } + case 'bone_compile': { const safe = safeResolvePath(args.path, cwd); if (!safe.ok) return { error: `bone_compile rejected: ${safe.reason}` }; diff --git a/bin/smallcode.js b/bin/smallcode.js index d2ac8768..c5be9c50 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -444,6 +444,7 @@ async function executeTool(name, args) { flags, config, tui, + skillManager, }); try { if (dedup) dedup.record(name, args, result); } catch {} @@ -2086,21 +2087,29 @@ function getMemoryContext(messages) { } } -// Auto-load relevant skills based on the user's message +// Auto-load relevant skills based on the user's message. // Fix #18: Cap skill injection to ~1000 tokens (4000 chars). Multiple matching // skills can each be a full .md file, quickly blowing up the system prompt. +// +// Lazy-skills: always inject the compact index (one line per skill, ~8 tokens each) +// so the model can call use_skill to pull any body on demand. Auto-matched skill +// bodies are appended after the index, subject to the 4000-char aggregate cap. function getSkillContext(messages) { if (!skillManager) return ''; try { + const { formatSkillIndex } = require('../src/plugins/skill_index_formatter'); + const index = skillManager.getIndex(); + const indexStr = formatSkillIndex(index); + const lastUser = [...messages].reverse().find(m => m.role === 'user'); - if (!lastUser) return ''; - const skills = skillManager.getAutoSkills(lastUser.content); - if (skills.length === 0) return ''; - const formatted = skillManager.formatForPrompt(skills); + const autoSkills = lastUser ? skillManager.getAutoSkills(lastUser.content) : []; + const autoFormatted = skillManager.formatForPrompt(autoSkills); + + const combined = indexStr + (autoFormatted ? '\n' + autoFormatted : ''); // Hard cap: truncate if too long - return formatted.length > 4000 - ? formatted.slice(0, 4000) + '\n... (skills truncated to fit context)' - : formatted; + return combined.length > 4000 + ? combined.slice(0, 4000) + '\n... (skills truncated to fit context)' + : combined; } catch { return ''; } diff --git a/bin/tools.js b/bin/tools.js index c34191a1..9682fd49 100644 --- a/bin/tools.js +++ b/bin/tools.js @@ -32,6 +32,7 @@ const TOOLS = [ { type: 'function', function: { name: 'contract_assert_pass', description: 'Mark a contract assertion as passed, with command-line evidence. Use the assertion id from contract_status (e.g. "a01"). evidence should be a short (<240 char) summary of what was run and what it returned.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id (e.g. a01)' }, evidence: { type: 'string', description: 'Short summary of command output proving the assertion holds' }, command: { type: 'string', description: 'The command run (optional)' }, exit_code: { type: 'integer', description: 'Exit code of the command (optional)' } }, required: ['assertion_id'] } } }, { type: 'function', function: { name: 'contract_assert_fail', description: 'Mark a contract assertion as failed, with evidence. Used when a check ran and the result was wrong — not for skipping checks.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id (e.g. a01)' }, evidence: { type: 'string', description: 'Short summary of why the check failed' }, command: { type: 'string', description: 'The command run (optional)' }, exit_code: { type: 'integer', description: 'Exit code of the command (optional)' } }, required: ['assertion_id', 'evidence'] } } }, { type: 'function', function: { name: 'contract_assert_skip', description: 'Mark an assertion as skipped (not applicable in current scope). Skipped assertions count as resolved for the done-guard.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id' }, reason: { type: 'string', description: 'Why this assertion is being skipped' } }, required: ['assertion_id', 'reason'] } } }, + { type: 'function', function: { name: 'use_skill', description: 'Load the full body of a skill by name. Use this when the skill index lists a skill relevant to your task. Returns the full skill content plus any related skill descriptions.', parameters: { type: 'object', properties: { name: { type: 'string', description: 'Skill name from the index' } }, required: ['name'] } } }, ]; // ─── Provider Tools ───────────────────────────────────────────────────────── diff --git a/src/plugins/skill_index_formatter.js b/src/plugins/skill_index_formatter.js new file mode 100644 index 00000000..defc4258 --- /dev/null +++ b/src/plugins/skill_index_formatter.js @@ -0,0 +1,39 @@ +'use strict'; + +// SmallCode — Skill index formatter +// Produces a compact index string (one line per skill, ~8 tokens each) suitable +// for always-injecting into the system prompt, plus a full-body formatter for +// use_skill results that includes related skill names/descriptions (not bodies). + +/** + * Format a flat index of skills — one line per skill. + * @param {Array<{name:string, description:string, trigger:string, keywords:string[]}>} entries + * @returns {string} + */ +function formatSkillIndex(entries) { + if (!entries || entries.length === 0) return ''; + const lines = entries.map(e => { + const kw = e.keywords && e.keywords.length ? ` [${e.keywords.join(',')}]` : ''; + const desc = e.description ? ` — ${e.description}` : ''; + return ` ${e.name}${desc}${kw}`; + }); + return '\n\nAvailable skills (call use_skill to load):\n' + lines.join('\n'); +} + +/** + * Format a loaded skill body for the use_skill response. + * Appends brief related-skill entries (name + description only, not body). + * @param {object} skill — {name, description, content, keywords, trigger} + * @param {Array<{name:string, description:string}>} relatedEntries — index entries for related skills + * @returns {string} + */ +function formatSkillResult(skill, relatedEntries) { + let out = `[skill:${skill.name}]\n${skill.content}`; + if (relatedEntries && relatedEntries.length > 0) { + const rel = relatedEntries.map(e => ` ${e.name}${e.description ? ' — ' + e.description : ''}`).join('\n'); + out += `\n\nRelated skills:\n${rel}`; + } + return out; +} + +module.exports = { formatSkillIndex, formatSkillResult }; diff --git a/src/plugins/skills.js b/src/plugins/skills.js index c4f5206a..68205b86 100644 --- a/src/plugins/skills.js +++ b/src/plugins/skills.js @@ -21,6 +21,10 @@ // previously skipped silently (closes #81). README-style files are ignored. // // Frontmatter accepts both LF and CRLF line endings (closes #52). +// +// Lazy loading: index entries (frontmatter only) are stored in _index Map. +// Bodies are loaded on demand via _loadBody(name) and cached into skills Map. +// getIndex() returns flat IndexEntry list for prompt injection. const fs = require('fs'); const path = require('path'); @@ -31,10 +35,16 @@ const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; // Docs that live alongside skills but aren't skills themselves const NON_SKILL_MD = /^(readme|changelog|license|contributing)\.md$/i; +// Max bytes to scan for frontmatter before falling back to full read. +const FRONTMATTER_SCAN_BYTES = 2048; +// Max lines to scan for frontmatter end marker. +const FRONTMATTER_SCAN_LINES = 50; + class SkillManager { constructor(projectDir) { this.projectDir = projectDir || process.cwd(); - this.skills = new Map(); // name → skill object + this.skills = new Map(); // name → fully-loaded skill object (cached) + this._index = new Map(); // name → IndexEntry (frontmatter + path, no body) this._load(); } @@ -130,87 +140,191 @@ class SkillManager { this._ingestFile(skillFile, path.basename(skillFile), skillDir, name, 'nested'); } + // Read only enough of the file to extract frontmatter (index-only load). + // Returns { frontmatter: string|null, bodyStart: number } — bodyStart is + // the byte offset where the body begins (after the closing ---). + // Falls back to a full read when the file is small enough or frontmatter + // spans more than FRONTMATTER_SCAN_BYTES. + _readFrontmatterOnly(filePath) { + try { + // Read a limited slice first. + const fd = fs.openSync(filePath, 'r'); + const buf = Buffer.alloc(FRONTMATTER_SCAN_BYTES); + const bytesRead = fs.readSync(fd, buf, 0, FRONTMATTER_SCAN_BYTES, 0); + fs.closeSync(fd); + const chunk = buf.slice(0, bytesRead).toString('utf-8'); + + if (!chunk.startsWith('---')) { + // No frontmatter — full content is body; return null so caller full-reads. + return { frontmatter: null, hasMore: bytesRead === FRONTMATTER_SCAN_BYTES }; + } + + // Find closing --- within FRONTMATTER_SCAN_LINES lines + const lines = chunk.split(/\r?\n/); + let closeIdx = -1; + for (let i = 1; i < Math.min(lines.length, FRONTMATTER_SCAN_LINES); i++) { + if (lines[i].trimEnd() === '---') { closeIdx = i; break; } + } + if (closeIdx === -1) { + // Frontmatter not closed within scan window — fall back to full read. + return { frontmatter: null, hasMore: true }; + } + + const frontmatter = lines.slice(1, closeIdx).join('\n'); + return { frontmatter, hasMore: bytesRead === FRONTMATTER_SCAN_BYTES }; + } catch { + return { frontmatter: null, hasMore: false }; + } + } + _ingestFile(filePath, filename, dir, defaultName, origin) { + // Index-only path: read frontmatter cheaply, store as index entry. + // Body is loaded lazily on first get(). + const { frontmatter, hasMore } = this._readFrontmatterOnly(filePath); + + let meta = {}; + if (frontmatter !== null) { + meta = this._parseMeta(frontmatter); + } + + const name = meta.name || defaultName || filename.replace(/\.md$/i, ''); + + const entry = { + name, + trigger: meta.trigger || 'manual', + keywords: Array.isArray(meta.keywords) ? meta.keywords : [], + description: meta.description || '', + tags: Array.isArray(meta.tags) ? meta.tags : [], + related: Array.isArray(meta.related) ? meta.related : [], + path: filePath, + origin: origin || (defaultName ? 'nested' : 'flat'), + // hasFrontmatter: whether the file had a --- block + _hasFrontmatter: frontmatter !== null, + // If the file fits in our scan and has frontmatter, we know + // the body wasn't loaded yet. Track that. + _bodyLoaded: false, + }; + + this._index.set(name, entry); + // Remove any stale cached body for same name (precedence override) + this.skills.delete(name); + } + + _parseMeta(frontmatter) { + const meta = {}; + for (const rawLine of frontmatter.split(/\r?\n/)) { + const m = rawLine.match(KV_RE); + if (!m) continue; + let value = m[2].trim(); + if (value.startsWith('[') && value.endsWith(']')) { + value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); + } + meta[m[1]] = value; + } + return meta; + } + + // Load the full body for a named skill, populate this.skills cache. + _loadBody(name) { + const entry = this._index.get(name); + if (!entry) return null; + if (entry._bodyLoaded && this.skills.has(name)) return this.skills.get(name); + let content; try { - content = fs.readFileSync(filePath, 'utf-8'); + content = fs.readFileSync(entry.path, 'utf-8'); } catch { - return; + return null; } - const skill = this._parse(content, filename, dir, defaultName, origin); - if (skill) this.skills.set(skill.name, skill); - } - _parse(content, filename, dir, defaultName, origin) { - // Parse YAML frontmatter (CRLF + LF tolerant — closes #52) const fmMatch = content.match(FM_RE); - let frontmatter = ''; let body = content; + let meta = {}; if (fmMatch) { - frontmatter = fmMatch[1]; + meta = this._parseMeta(fmMatch[1]); body = fmMatch[2]; - } else if (!defaultName) { - // Files without frontmatter and no derivable name aren't skills. - // Flat + nested loaders always pass a defaultName, so frontmatter-less - // files load as manual skills (closes #81); README-style files are - // filtered by name in _loadFlat. - return null; - } - - // Tiny YAML parser — no dep needed - const meta = {}; - if (frontmatter) { - for (const rawLine of frontmatter.split(/\r?\n/)) { - const m = rawLine.match(KV_RE); - if (!m) continue; - let value = m[2].trim(); - if (value.startsWith('[') && value.endsWith(']')) { - value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); - } - meta[m[1]] = value; - } + } else if (!entry._hasFrontmatter) { + // No frontmatter — full file is body (manual trigger, named by filename/dir) + body = content; } - return { - name: meta.name || defaultName || filename.replace(/\.md$/i, ''), - trigger: meta.trigger || 'manual', - keywords: Array.isArray(meta.keywords) ? meta.keywords : [], + const skill = { + name: meta.name || entry.name, + trigger: meta.trigger || entry.trigger, + keywords: Array.isArray(meta.keywords) ? meta.keywords : entry.keywords, + description: meta.description || entry.description || '', + tags: Array.isArray(meta.tags) ? meta.tags : entry.tags, + related: Array.isArray(meta.related) ? meta.related : entry.related, content: body.trim(), - path: path.join(dir, filename), - origin: origin || (defaultName ? 'nested' : 'flat'), + path: entry.path, + origin: entry.origin, }; + + entry._bodyLoaded = true; + this.skills.set(name, skill); + return skill; } - // Get all skills + // Get all skills — returns index entries with lazy-loaded bodies for callers + // that need content. list() does NOT load bodies (index only). list() { - return [...this.skills.values()].map(s => ({ - name: s.name, - trigger: s.trigger, - keywords: s.keywords, - preview: s.content.slice(0, 80) + (s.content.length > 80 ? '...' : ''), - origin: s.origin || 'flat', + return [...this._index.values()].map(e => ({ + name: e.name, + trigger: e.trigger, + keywords: e.keywords, + preview: this._getPreview(e), + origin: e.origin || 'flat', })); } - // Get a skill by name + _getPreview(entry) { + // Return preview from cached body if available; otherwise a short placeholder. + if (entry._bodyLoaded && this.skills.has(entry.name)) { + const body = this.skills.get(entry.name).content; + return body.slice(0, 80) + (body.length > 80 ? '...' : ''); + } + // Avoid loading body just for list() — return description or empty + return entry.description || ''; + } + + // Get a skill by name — lazily loads body on first call. get(name) { - return this.skills.get(name) || null; + if (this.skills.has(name)) return this.skills.get(name); + if (!this._index.has(name)) return null; + return this._loadBody(name); } - // Get skills that should auto-inject for a given message + // Get skills that should auto-inject for a given message. + // Only checks index entries (trigger/keywords) — avoids loading bodies + // until caller needs content. getAutoSkills(message) { const msg = (message || '').toLowerCase(); const results = []; - for (const skill of this.skills.values()) { - if (skill.trigger === 'auto') { - results.push(skill); - } else if (skill.trigger === 'match' && skill.keywords.length > 0) { - const match = skill.keywords.some(kw => msg.includes(String(kw).toLowerCase())); - if (match) results.push(skill); + for (const entry of this._index.values()) { + if (entry.trigger === 'auto') { + results.push(this._loadBody(entry.name)); + } else if (entry.trigger === 'match' && entry.keywords.length > 0) { + const match = entry.keywords.some(kw => msg.includes(String(kw).toLowerCase())); + if (match) results.push(this._loadBody(entry.name)); } } - return results; + return results.filter(Boolean); + } + + // Return flat IndexEntry list for prompt injection (no bodies loaded). + // { name, description, trigger, keywords, tags, related, path, origin } + getIndex() { + return [...this._index.values()].map(e => ({ + name: e.name, + description: e.description, + trigger: e.trigger, + keywords: e.keywords, + tags: e.tags, + related: e.related, + path: e.path, + origin: e.origin, + })); } // Create a new skill in the project's .smallcode/skills directory @@ -239,10 +353,16 @@ class SkillManager { name, trigger, keywords, + description: options.description || '', + tags: options.tags || [], + related: options.related || [], content, path: filePath, origin: 'flat', + _hasFrontmatter: true, + _bodyLoaded: true, }; + this._index.set(name, skill); this.skills.set(name, skill); return skill; } @@ -276,11 +396,12 @@ class SkillManager { // Remove a skill remove(name) { - const skill = this.skills.get(name); - if (!skill) return false; - if (fs.existsSync(skill.path)) { - try { fs.unlinkSync(skill.path); } catch {} + const entry = this._index.get(name) || this.skills.get(name); + if (!entry) return false; + if (fs.existsSync(entry.path)) { + try { fs.unlinkSync(entry.path); } catch {} } + this._index.delete(name); this.skills.delete(name); return true; } diff --git a/test/skill_lazy.test.js b/test/skill_lazy.test.js new file mode 100644 index 00000000..59da7938 --- /dev/null +++ b/test/skill_lazy.test.js @@ -0,0 +1,190 @@ +'use strict'; + +// SmallCode — Lazy skill loading tests +// Verifies index-first SkillManager, lazy body loading, getIndex() fields, +// formatter output, and backward compatibility with existing callers. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { SkillManager } = require('../src/plugins/skills'); +const { formatSkillIndex, formatSkillResult } = require('../src/plugins/skill_index_formatter'); + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-lazy-')); +} + +function write(file, content) { + fs.mkdirSync(path.dirname(file), { recursive: true }); + fs.writeFileSync(file, content); +} + +// ── Index-only startup ──────────────────────────────────────────────────────── + +test('index is populated on construction without loading bodies', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'alpha.md'), + '---\nname: alpha\ntrigger: manual\ndescription: does alpha things\n---\nbody text here'); + + const sm = new SkillManager(dir); + // _index must have the entry + assert.ok(sm._index.has('alpha'), '_index should have alpha'); + // skills (body cache) should NOT have it yet + assert.ok(!sm.skills.has('alpha'), 'body cache should be empty before get()'); +}); + +test('getIndex() returns expected fields without loading bodies', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'beta.md'), + '---\nname: beta\ntrigger: match\nkeywords: [foo, bar]\ndescription: beta desc\ntags: [t1]\nrelated: [alpha]\n---\nbeta body'); + + const sm = new SkillManager(dir); + const idx = sm.getIndex(); + const entry = idx.find(e => e.name === 'beta'); + assert.ok(entry, 'getIndex should return beta'); + assert.equal(entry.name, 'beta'); + assert.equal(entry.description, 'beta desc'); + assert.equal(entry.trigger, 'match'); + assert.deepEqual(entry.keywords, ['foo', 'bar']); + assert.deepEqual(entry.tags, ['t1']); + assert.deepEqual(entry.related, ['alpha']); + assert.ok(entry.path); + assert.equal(entry.origin, 'flat'); + // Body should still not be loaded + assert.ok(!sm.skills.has('beta')); +}); + +// ── Lazy get() ──────────────────────────────────────────────────────────────── + +test('get() lazily loads body on first call', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'lazy.md'), + '---\nname: lazy\ntrigger: manual\n---\nthe lazy body content'); + + const sm = new SkillManager(dir); + assert.ok(!sm.skills.has('lazy'), 'body not loaded yet'); + const skill = sm.get('lazy'); + assert.ok(skill, 'get() returns the skill'); + assert.match(skill.content, /the lazy body content/); + assert.ok(sm.skills.has('lazy'), 'body is cached after get()'); +}); + +test('get() caches: second call returns same object', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'cached.md'), + '---\nname: cached\ntrigger: manual\n---\ncached body'); + + const sm = new SkillManager(dir); + const first = sm.get('cached'); + const second = sm.get('cached'); + assert.strictEqual(first, second, 'should return same cached object'); +}); + +test('get() returns null for unknown skill', () => { + const dir = freshProject(); + const sm = new SkillManager(dir); + assert.equal(sm.get('nonexistent'), null); +}); + +// ── Backward compat: public API unchanged ───────────────────────────────────── + +test('list() returns entries with name/trigger/keywords/origin', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'listme.md'), + '---\nname: listme\ntrigger: auto\nkeywords: [x]\n---\nlist body'); + + const sm = new SkillManager(dir); + const items = sm.list(); + const item = items.find(i => i.name === 'listme'); + assert.ok(item); + assert.equal(item.trigger, 'auto'); + assert.deepEqual(item.keywords, ['x']); + assert.equal(item.origin, 'flat'); + // list() should NOT load bodies + assert.ok(!sm.skills.has('listme')); +}); + +test('getAutoSkills() loads bodies only for matched skills', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'always.md'), + '---\nname: always\ntrigger: auto\n---\nauto body'); + write(path.join(dir, '.smallcode', 'skills', 'keyword.md'), + '---\nname: keyword\ntrigger: match\nkeywords: [deploy]\n---\ndeploy body'); + write(path.join(dir, '.smallcode', 'skills', 'nomatch.md'), + '---\nname: nomatch\ntrigger: match\nkeywords: [unrelated]\n---\nnomatch body'); + + const sm = new SkillManager(dir); + const result = sm.getAutoSkills('please deploy the app'); + const names = result.map(s => s.name).sort(); + assert.deepEqual(names, ['always', 'keyword']); + // nomatch should not be loaded + assert.ok(!sm.skills.has('nomatch')); +}); + +// ── Formatter ──────────────────────────────────────────────────────────────── + +test('formatSkillIndex produces one line per skill', () => { + const entries = [ + { name: 'foo', description: 'does foo', trigger: 'manual', keywords: [] }, + { name: 'bar', description: 'does bar', trigger: 'match', keywords: ['baz'] }, + ]; + const out = formatSkillIndex(entries); + assert.ok(out.includes('foo')); + assert.ok(out.includes('bar')); + // Each skill on its own line + const lines = out.split('\n').filter(l => l.includes('foo') || l.includes('bar')); + assert.equal(lines.length, 2); +}); + +test('formatSkillIndex returns empty string for no entries', () => { + assert.equal(formatSkillIndex([]), ''); + assert.equal(formatSkillIndex(null), ''); +}); + +test('formatSkillResult includes body and related names', () => { + const skill = { name: 'main', description: '', content: 'main body content', keywords: [], trigger: 'manual' }; + const related = [ + { name: 'other', description: 'the other skill' }, + ]; + const out = formatSkillResult(skill, related); + assert.ok(out.includes('main body content')); + assert.ok(out.includes('other')); + assert.ok(out.includes('the other skill')); +}); + +test('formatSkillResult with no related entries', () => { + const skill = { name: 's', content: 'solo body', keywords: [], trigger: 'manual', description: '' }; + const out = formatSkillResult(skill, []); + assert.ok(out.includes('solo body')); + assert.ok(!out.includes('Related skills')); +}); + +// ── New frontmatter fields backward compat ──────────────────────────────────── + +test('skills without description/tags/related still load correctly', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'plain.md'), + '---\nname: plain\ntrigger: manual\n---\njust a plain body'); + + const sm = new SkillManager(dir); + const skill = sm.get('plain'); + assert.ok(skill); + assert.equal(skill.description, ''); + assert.deepEqual(skill.tags, []); + assert.deepEqual(skill.related, []); + assert.match(skill.content, /just a plain body/); +}); + +test('add() works and skill is in index immediately', () => { + const dir = freshProject(); + const sm = new SkillManager(dir); + sm.add('added', 'added content', { trigger: 'auto', description: 'an added skill' }); + + assert.ok(sm._index.has('added')); + const skill = sm.get('added'); + assert.ok(skill); + assert.match(skill.content, /added content/); +}); From 7980c959eb966b0c1080234ec83b48c3d709c992 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:54:39 -0700 Subject: [PATCH 08/27] feat(memory): hygiene tiers + MEMORY.md index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Memory objects gain tier (hot|archive) and last_used_at fields (backward-compat: backfilled on first hygiene run). runHygiene() sweeps: hot+unused>60d→archive, archive>90d→forget, hot>20→archive oldest 5. Adapter layer handles both SQLite budget-aware-mcp (via update()) and fallback MemoryStore (mutate+save) without touching node_modules. Auto-runs silently (try-catch) at 3 session-save points. /memory hygiene and /memory index subcommands added to commands.js. Generated .smallcode/MEMORY.md is human-readable + git-diffable; never authoritative. Rejected: markdown-tier replacement | loses FTS5/BM25 Rejected: hybrid two-source write | inconsistency risk Constraint: do not modify node_modules/budget-aware-mcp Confidence: high Scope-risk: narrow Not-tested: budget-aware-mcp setMeta path (no setMeta exists — update() used instead) --- bin/commands.js | 37 +++++- bin/memory.js | 5 +- bin/smallcode.js | 3 + src/memory/hygiene.js | 242 ++++++++++++++++++++++++++++++++++++ test/memory_hygiene.test.js | 182 +++++++++++++++++++++++++++ 5 files changed, 466 insertions(+), 3 deletions(-) create mode 100644 src/memory/hygiene.js create mode 100644 test/memory_hygiene.test.js diff --git a/bin/commands.js b/bin/commands.js index e1a7e11c..184fb7b4 100644 --- a/bin/commands.js +++ b/bin/commands.js @@ -307,9 +307,42 @@ module.exports = function createCommandHandler(config, conversationHistory, impr } catch (e) { console.log(chalk.gray(` Error: ${e.message}`)); } + } else if (sub === 'hygiene') { + try { + const { runHygiene } = require('../src/memory/hygiene'); + const result = runHygiene(memoryStore); + console.log(chalk.green(` ✓ Hygiene complete: ${result.archived} archived, ${result.deleted} deleted`)); + // Also write MEMORY.md index + const { renderMemoryIndex } = require('../src/memory/hygiene'); + const md = renderMemoryIndex(memoryStore); + const fs = require('fs'); + const path = require('path'); + const outDir = path.join(process.cwd(), '.smallcode'); + if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true }); + fs.writeFileSync(path.join(outDir, 'MEMORY.md'), md); + console.log(chalk.gray(` Wrote .smallcode/MEMORY.md (${memoryStore.all().length} entries)`)); + } catch (e) { + console.log(chalk.gray(` Hygiene error: ${e.message}`)); + } + } else if (sub === 'index') { + try { + const { renderMemoryIndex } = require('../src/memory/hygiene'); + const md = renderMemoryIndex(memoryStore); + const fs = require('fs'); + const path = require('path'); + const outDir = path.join(process.cwd(), '.smallcode'); + if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true }); + fs.writeFileSync(path.join(outDir, 'MEMORY.md'), md); + console.log(chalk.green(` ✓ Wrote .smallcode/MEMORY.md`)); + console.log(md.split('\n').slice(0, 10).map(l => ' ' + l).join('\n')); + } catch (e) { + console.log(chalk.gray(` Index error: ${e.message}`)); + } } else { - console.log(chalk.gray(' /memory List stored memory')); - console.log(chalk.gray(' /memory clear Clear all memory')); + console.log(chalk.gray(' /memory List stored memory')); + console.log(chalk.gray(' /memory clear Clear all memory')); + console.log(chalk.gray(' /memory hygiene Sweep tiers, prune stale entries, write MEMORY.md')); + console.log(chalk.gray(' /memory index Write .smallcode/MEMORY.md without sweeping')); } console.log(''); rl.prompt(); diff --git a/bin/memory.js b/bin/memory.js index fbe842d1..9183be2f 100644 --- a/bin/memory.js +++ b/bin/memory.js @@ -20,7 +20,7 @@ const INDEX_FILE = '.smallcode/memory/index.json'; // ─── Memory Object ─────────────────────────────────────────────────────────── class MemoryObject { - constructor({ id, type, title, content, tags, relations, createdAt, updatedAt, source }) { + constructor({ id, type, title, content, tags, relations, createdAt, updatedAt, source, tier, last_used_at }) { this.id = id || crypto.randomUUID().slice(0, 8); this.type = type; // decision | workflow | gotcha | convention | context | source this.title = title; @@ -30,6 +30,8 @@ class MemoryObject { this.createdAt = createdAt || new Date().toISOString(); this.updatedAt = updatedAt || new Date().toISOString(); this.source = source || null; // { file, line, commit } + this.tier = tier || 'hot'; // hot | archive + this.last_used_at = last_used_at || this.createdAt; } toJSON() { @@ -37,6 +39,7 @@ class MemoryObject { id: this.id, type: this.type, title: this.title, content: this.content, tags: this.tags, relations: this.relations, createdAt: this.createdAt, updatedAt: this.updatedAt, source: this.source, + tier: this.tier, last_used_at: this.last_used_at, }; } } diff --git a/bin/smallcode.js b/bin/smallcode.js index c5be9c50..3df128ff 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -287,6 +287,7 @@ async function runTUI(config) { onCommand: async (cmd) => { if (cmd === '/quit' || cmd === '/q' || cmd === '/exit') { if (sessionStore) sessionStore.save(conversationHistory, { tokens: tokenTracker ? tokenTracker.stats() : undefined }); + try { if (memoryStore) { const { runHygiene } = require('../src/memory/hygiene'); runHygiene(memoryStore); } } catch {} screen.leave(); killMCP() process.exit(0); @@ -318,6 +319,7 @@ async function runTUI(config) { if (sessionStore) { sessionStore.save(conversationHistory, { tokens: tokenTracker ? tokenTracker.stats() : undefined }); } + try { if (memoryStore) { const { runHygiene } = require('../src/memory/hygiene'); runHygiene(memoryStore); } } catch {} killMCP() process.exit(0); }, @@ -2555,6 +2557,7 @@ async function chatCompletion(config, messages) { }); sessionStore.autoTitle(conversationHistory); } + try { if (memoryStore) { const { runHygiene } = require('../src/memory/hygiene'); runHygiene(memoryStore); } } catch {} return data; } catch (err) { diff --git a/src/memory/hygiene.js b/src/memory/hygiene.js new file mode 100644 index 00000000..c84c206e --- /dev/null +++ b/src/memory/hygiene.js @@ -0,0 +1,242 @@ +'use strict'; + +// SmallCode — Memory Hygiene +// Promotes memory objects to hot/archive tiers and prunes stale entries. +// Runs silently at session-save points; never throws — all errors are swallowed. +// +// Tier model: +// hot — actively used; default for new entries +// archive — dormant; de-ranked in retrieval (0.3x weight) +// +// Age rules (applied in order): +// hot + last_used_at > HOT_CAP_AGE_DAYS → archive +// archive + age > DELETE_AGE_DAYS → forget +// hot count > HOT_CAP → oldest BATCH → archive + +const fs = require('fs'); +const path = require('path'); + +const HOT_CAP = 20; // max hot-tier entries +const BATCH = 5; // how many to archive per cap sweep +const ARCHIVE_AGE = 60; // days unused before hot → archive +const DELETE_AGE = 90; // days in archive before deletion +const MS_PER_DAY = 86400000; + +/** + * Normalize a store to a common interface regardless of whether it's the + * SQLite budget-aware-mcp store or the fallback MemoryStore from bin/memory.js. + * + * Returns { all, getMeta, setMeta, forget } where: + * all() → MemoryObject[] + * getMeta(obj) → { tier, last_used_at } + * setMeta(obj, m) → void (mutates in-place for fallback; updates DB for SQLite) + * forget(id) → void + */ +function makeAdapter(store) { + const isSqlite = typeof store.update === 'function'; + + function all() { + return store.all(); + } + + function getMeta(obj) { + return { + tier: obj.tier || 'hot', + last_used_at: obj.last_used_at || obj.createdAt || obj.created_at || new Date(0).toISOString(), + }; + } + + function setMeta(obj, meta) { + if (isSqlite) { + // SQLite store has update() — use it to avoid the forget+remember dedup + // trap (re-inserting identical content is blocked by content_hash check). + // We encode tier/last_used_at into the tags array so no schema change is + // needed on budget-aware-mcp. + try { + const existingTags = (obj.tags || []).filter(t => !t.startsWith('tier:') && !t.startsWith('last_used:')); + const newTags = [ + ...existingTags, + `tier:${meta.tier}`, + `last_used:${meta.last_used_at}`, + ]; + store.update(obj.id, { tags: newTags }); + } catch {} + } else { + // Fallback MemoryStore (bin/memory.js): mutate in-place and save. + obj.tier = meta.tier; + obj.last_used_at = meta.last_used_at; + if (typeof store.save === 'function') { + try { store.save(); } catch {} + } + } + } + + function forget(id) { + try { store.forget(id); } catch {} + } + + return { all, getMeta, setMeta, forget }; +} + +/** + * Extract tier/last_used_at from a memory object regardless of store type. + * For SQLite stores we encode these values in tags as 'tier:X' and 'last_used:ISO'. + */ +function extractMeta(obj) { + // Try direct properties first (fallback MemoryStore) + if (obj.tier && obj.last_used_at) { + return { tier: obj.tier, last_used_at: obj.last_used_at }; + } + // Try tags encoding (SQLite store) + const tags = obj.tags || []; + let tier = 'hot'; + let last_used_at = obj.createdAt || obj.created_at || new Date(0).toISOString(); + for (const t of tags) { + if (t.startsWith('tier:')) tier = t.slice(5); + if (t.startsWith('last_used:')) last_used_at = t.slice(10); + } + return { tier, last_used_at }; +} + +/** + * Run hygiene on the store. Silent: never throws. + * + * @param {object} store — MemoryStore or budget-aware-mcp store + * @param {object} [opts] + * @param {number} [opts.hotCap=20] + * @param {number} [opts.batch=5] + * @param {number} [opts.archiveAge=60] days + * @param {number} [opts.deleteAge=90] days + * @returns {{ archived: number, deleted: number, total: number }} + */ +function runHygiene(store, opts = {}) { + const hotCap = opts.hotCap ?? HOT_CAP; + const batch = opts.batch ?? BATCH; + const archiveAge = opts.archiveAge ?? ARCHIVE_AGE; + const deleteAge = opts.deleteAge ?? DELETE_AGE; + + let archived = 0; + let deleted = 0; + + try { + const adapter = makeAdapter(store); + const now = Date.now(); + const objects = adapter.all(); + + // Backfill: assign hot tier + last_used_at to any entry that lacks them. + for (const obj of objects) { + const m = extractMeta(obj); + if (!obj.tier && !obj.tags?.some(t => t.startsWith('tier:'))) { + adapter.setMeta(obj, { + tier: 'hot', + last_used_at: m.last_used_at, + }); + } + } + + // Re-read after backfill so we have fresh state. + const fresh = adapter.all(); + + // ── Age sweep ──────────────────────────────────────────────────────────── + for (const obj of fresh) { + const { tier, last_used_at } = extractMeta(obj); + const ageMs = now - new Date(last_used_at).getTime(); + const ageDays = ageMs / MS_PER_DAY; + + if (tier === 'hot' && ageDays > archiveAge) { + adapter.setMeta(obj, { tier: 'archive', last_used_at }); + archived++; + } else if (tier === 'archive' && ageDays > deleteAge) { + adapter.forget(obj.id); + deleted++; + } + } + + // ── Cap sweep ──────────────────────────────────────────────────────────── + // Re-read to get up-to-date list (age sweep may have archived some). + const afterAge = adapter.all().filter(obj => { + const { tier } = extractMeta(obj); + return tier === 'hot'; + }); + + if (afterAge.length > hotCap) { + // Sort by last_used_at ascending (oldest first) + afterAge.sort((a, b) => { + const { last_used_at: la } = extractMeta(a); + const { last_used_at: lb } = extractMeta(b); + return new Date(la).getTime() - new Date(lb).getTime(); + }); + const toArchive = afterAge.slice(0, batch); + for (const obj of toArchive) { + const { last_used_at } = extractMeta(obj); + adapter.setMeta(obj, { tier: 'archive', last_used_at }); + archived++; + } + } + } catch { + // Hygiene must never crash the session. + } + + return { archived, deleted, total: archived + deleted }; +} + +/** + * Render a human-readable memory index to a markdown string. + * Hot entries come before archive. Grouped by type within each tier. + * This file is GENERATED — never authoritative. + * + * @param {object} store + * @returns {string} + */ +function renderMemoryIndex(store) { + try { + const objects = store.all(); + if (objects.length === 0) return '# Memory Index\n\n(empty)\n'; + + const hot = []; + const archive = []; + for (const obj of objects) { + const { tier } = extractMeta(obj); + if (tier === 'archive') archive.push(obj); + else hot.push(obj); + } + + function groupByType(objs) { + const groups = {}; + for (const o of objs) { + if (!groups[o.type]) groups[o.type] = []; + groups[o.type].push(o); + } + return groups; + } + + function renderGroup(groups) { + let out = ''; + for (const [type, objs] of Object.entries(groups)) { + out += `\n### ${type} (${objs.length})\n`; + for (const o of objs) { + out += `- [${o.id}] **${o.title}**\n`; + } + } + return out; + } + + let md = `# Memory Index\n\nGenerated: ${new Date().toISOString()}\n`; + md += `Total: ${objects.length} (hot: ${hot.length}, archive: ${archive.length})\n`; + + if (hot.length > 0) { + md += '\n## Hot\n'; + md += renderGroup(groupByType(hot)); + } + if (archive.length > 0) { + md += '\n## Archive\n'; + md += renderGroup(groupByType(archive)); + } + + return md; + } catch { + return '# Memory Index\n\n(error rendering)\n'; + } +} + +module.exports = { runHygiene, renderMemoryIndex, extractMeta }; diff --git a/test/memory_hygiene.test.js b/test/memory_hygiene.test.js new file mode 100644 index 00000000..af5f94e4 --- /dev/null +++ b/test/memory_hygiene.test.js @@ -0,0 +1,182 @@ +'use strict'; + +// SmallCode — Memory hygiene tests +// Verifies age/cap sweeps, backfill, index render, no-op empty, round-trip. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { runHygiene, renderMemoryIndex, extractMeta } = require('../src/memory/hygiene'); +const { MemoryStore } = require('../bin/memory'); + +function freshStore() { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'sc-hygiene-')); + return new MemoryStore(dir); +} + +function daysAgo(n) { + return new Date(Date.now() - n * 86400000).toISOString(); +} + +// ── No-op on empty store ────────────────────────────────────────────────────── + +test('runHygiene on empty store returns zeros', () => { + const store = freshStore(); + const result = runHygiene(store); + assert.equal(result.archived, 0); + assert.equal(result.deleted, 0); + assert.equal(result.total, 0); +}); + +// ── Backfill ───────────────────────────────────────────────────────────────── + +test('runHygiene backfills tier=hot and last_used_at on old entries', () => { + const store = freshStore(); + // Remember without tier/last_used_at (old-format entry) + const obj = store.remember('decision', 'old entry', 'content', {}); + // Strip tier/last_used_at to simulate pre-hygiene entry + obj.tier = undefined; + obj.last_used_at = undefined; + store.save(); // persist mutated object + + runHygiene(store, { archiveAge: 9999, deleteAge: 9999 }); + + const objs = store.all(); + const { tier } = extractMeta(objs[0]); + assert.equal(tier, 'hot'); +}); + +// ── Age sweep: hot → archive ────────────────────────────────────────────────── + +test('hot entry unused > archiveAge is moved to archive', () => { + const store = freshStore(); + const obj = store.remember('gotcha', 'stale hot', 'content', {}); + // Force last_used_at to 70 days ago + obj.last_used_at = daysAgo(70); + obj.tier = 'hot'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.archived, 1); + + const { tier } = extractMeta(store.all()[0]); + assert.equal(tier, 'archive'); +}); + +test('hot entry within archiveAge is NOT archived', () => { + const store = freshStore(); + const obj = store.remember('context', 'fresh entry', 'content', {}); + obj.last_used_at = daysAgo(5); + obj.tier = 'hot'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.archived, 0); + assert.equal(result.deleted, 0); +}); + +// ── Age sweep: archive → delete ─────────────────────────────────────────────── + +test('archive entry older than deleteAge is deleted', () => { + const store = freshStore(); + const obj = store.remember('workflow', 'ancient archive', 'content', {}); + obj.last_used_at = daysAgo(100); + obj.tier = 'archive'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.deleted, 1); + assert.equal(store.all().length, 0); +}); + +test('archive entry within deleteAge is NOT deleted', () => { + const store = freshStore(); + const obj = store.remember('workflow', 'recent archive', 'content', {}); + obj.last_used_at = daysAgo(65); + obj.tier = 'archive'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.deleted, 0); + // May or may not archive again based on whether it's already archive + assert.equal(store.all().length, 1); +}); + +// ── Cap sweep ──────────────────────────────────────────────────────────────── + +test('cap sweep archives oldest entries when hot > hotCap', () => { + const store = freshStore(); + // Create 6 hot entries with varying last_used_at, cap=4, batch=2 + for (let i = 0; i < 6; i++) { + const obj = store.remember('convention', `entry-${i}`, `content ${i}`, {}); + obj.last_used_at = daysAgo(i * 2); // older entries have higher i + obj.tier = 'hot'; + store.save(); + } + + const result = runHygiene(store, { hotCap: 4, batch: 2, archiveAge: 9999, deleteAge: 9999 }); + assert.equal(result.archived, 2); + + const all = store.all(); + const archived = all.filter(o => extractMeta(o).tier === 'archive'); + assert.equal(archived.length, 2); + // The 2 oldest should be archived + const archivedNames = archived.map(o => o.title).sort(); + assert.ok(archivedNames.includes('entry-4') || archivedNames.includes('entry-5')); +}); + +// ── No-op when under cap ───────────────────────────────────────────────────── + +test('cap sweep is no-op when hot count <= hotCap', () => { + const store = freshStore(); + const obj = store.remember('decision', 'single entry', 'content', {}); + obj.tier = 'hot'; + obj.last_used_at = daysAgo(1); + store.save(); + + const result = runHygiene(store, { hotCap: 10, batch: 5, archiveAge: 9999, deleteAge: 9999 }); + assert.equal(result.archived, 0); +}); + +// ── renderMemoryIndex ───────────────────────────────────────────────────────── + +test('renderMemoryIndex returns empty marker for empty store', () => { + const store = freshStore(); + const md = renderMemoryIndex(store); + assert.ok(md.includes('empty')); +}); + +test('renderMemoryIndex groups by tier then type', () => { + const store = freshStore(); + const h = store.remember('decision', 'hot entry', 'content', {}); + h.tier = 'hot'; + store.save(); + const a = store.remember('workflow', 'archive entry', 'other', {}); + a.tier = 'archive'; + store.save(); + + const md = renderMemoryIndex(store); + assert.ok(md.includes('## Hot')); + assert.ok(md.includes('## Archive')); + // Hot section comes before archive + assert.ok(md.indexOf('## Hot') < md.indexOf('## Archive')); +}); + +// ── Round-trip: tier survives save/reload ──────────────────────────────────── + +test('tier and last_used_at survive store save and reload', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'sc-hygiene-rt-')); + const store1 = new MemoryStore(dir); + const obj = store1.remember('context', 'persist me', 'content', {}); + obj.tier = 'archive'; + obj.last_used_at = daysAgo(70); + store1.save(); + + const store2 = new MemoryStore(dir); + const loaded = store2.all()[0]; + assert.equal(loaded.tier, 'archive'); + assert.ok(loaded.last_used_at); +}); From 7095ce325ba0206224bdf5bb727173b17c461117 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 09:52:26 -0700 Subject: [PATCH 09/27] fix(evolver): stopword filtering in prompt clustering Field regression: rephrased prompts with filler drift (another/please/new) failed to cluster because stopwords diluted Jaccard below threshold. Real prompts from a live session pinned as a test. --- src/plugins/friction_analyzer.js | 12 +++++++++++- test/evolver.test.js | 13 +++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/plugins/friction_analyzer.js b/src/plugins/friction_analyzer.js index 2ba25bed..0165db58 100644 --- a/src/plugins/friction_analyzer.js +++ b/src/plugins/friction_analyzer.js @@ -13,9 +13,19 @@ const REPEAT_THRESHOLD = 3; const RETRY_THRESHOLD = 3; const SIMILARITY_THRESHOLD = 0.5; +// Filler words carry no task identity but dilute Jaccard similarity — +// "another seating chart please" must cluster with "a seating chart for..." +const STOPWORDS = new Set([ + 'the', 'and', 'for', 'with', 'that', 'this', 'these', 'those', 'from', + 'into', 'onto', 'please', 'can', 'you', 'could', 'would', 'will', + 'another', 'again', 'new', 'now', 'just', 'some', 'all', 'any', + 'make', 'give', 'get', 'want', 'need', 'like', +]); + function _wordSet(text) { return new Set( - String(text || '').toLowerCase().split(/[^a-z0-9]+/).filter(w => w.length > 2) + String(text || '').toLowerCase().split(/[^a-z0-9]+/) + .filter(w => w.length > 2 && !STOPWORDS.has(w)) ); } diff --git a/test/evolver.test.js b/test/evolver.test.js index ffb42a7a..4fcf3d0e 100644 --- a/test/evolver.test.js +++ b/test/evolver.test.js @@ -128,6 +128,19 @@ test('three near-identical prompts flag a repeated pattern', () => { assert.deepEqual(r.repeated_patterns[0].traceIds.sort(), ['a1', 'a2', 'a3']); }); +test('rephrased prompts with filler-word drift still cluster (field regression)', () => { + // Exact prompts from a real session that failed to cluster before + // stopword filtering: the third drops the names and adds filler. + const traces = [ + trace('s1', 'generate a random seating chart for my classroom students Ana, Ben, Cara, Dan, Eli and Fay'), + trace('s2', 'generate a new random seating chart for the classroom students Ana, Ben, Cara, Dan, Eli and Fay'), + trace('s3', 'generate another random seating chart for my classroom students please'), + ]; + const r = extractFrictionSignals(traces); + assert.equal(r.repeated_patterns.length, 1); + assert.equal(r.repeated_patterns[0].count, 3); +}); + test('repeated pattern covered by an existing skill keyword is suppressed', () => { const traces = [ trace('a1', 'convert this csv file to json format'), From 406c4fbae1e6d9c11aafc66f71dc6b9fadb9a45c Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 09:09:24 -0700 Subject: [PATCH 10/27] fix(skills): route use_skill through tool category filters use_skill was defined in TOOLS but absent from both routers' category whitelists, so the model never saw it in routed mode. The skill index is injected every turn, so the tool rides along in every tool-bearing category (~80 tokens). --- src/compiled/tool_router.js | 19 +++++++++++-------- src/tools/two_stage_router.js | 10 +++++++--- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/compiled/tool_router.js b/src/compiled/tool_router.js index 10371a56..3b39d6d1 100644 --- a/src/compiled/tool_router.js +++ b/src/compiled/tool_router.js @@ -228,25 +228,28 @@ function classifyToolCategory(message) { * @returns {string[]} tool names to include in the prompt */ function getToolsForCategory(category) { + // use_skill rides along in every tool-bearing category — the skill index + // is injected on every turn, so the model must always be able to pull a + // skill body regardless of how the task was classified (~80 token cost). switch (category) { case 'code_intel': - return ['graph_search', 'explain_symbol', 'read_file', 'find_files', 'search', 'hybrid_search']; + return ['graph_search', 'explain_symbol', 'read_file', 'find_files', 'search', 'hybrid_search', 'use_skill']; case 'read': - return ['read_file', 'list_projects', 'graph_search', 'find_files', 'find_and_read']; + return ['read_file', 'list_projects', 'graph_search', 'find_files', 'find_and_read', 'use_skill']; case 'write': - return ['read_file', 'write_file', 'patch', 'bash', 'read_and_patch', 'create_and_run']; + return ['read_file', 'write_file', 'patch', 'bash', 'read_and_patch', 'create_and_run', 'use_skill']; case 'search': - return ['search', 'find_files', 'graph_search', 'read_file', 'explain_symbol', 'search_and_read', 'hybrid_search']; + return ['search', 'find_files', 'graph_search', 'read_file', 'explain_symbol', 'search_and_read', 'hybrid_search', 'use_skill']; case 'run': - return ['bash', 'run', 'read_file']; + return ['bash', 'run', 'read_file', 'use_skill']; case 'plan': - return ['read_file', 'write_file', 'patch', 'bash', 'search', 'find_files', 'graph_search', 'memory_load', 'memory_remember', 'bone_compile', 'bone_check', 'read_and_patch', 'create_and_run', 'find_and_read', 'search_and_read']; + return ['read_file', 'write_file', 'patch', 'bash', 'search', 'find_files', 'graph_search', 'memory_load', 'memory_remember', 'bone_compile', 'bone_check', 'read_and_patch', 'create_and_run', 'find_and_read', 'search_and_read', 'use_skill']; case 'web': - return ['web_search', 'web_fetch', 'read_file']; + return ['web_search', 'web_fetch', 'read_file', 'use_skill']; case 'respond': return []; // No tools needed for pure responses default: - return ['read_file', 'write_file', 'patch', 'bash', 'search']; + return ['read_file', 'write_file', 'patch', 'bash', 'search', 'use_skill']; } } diff --git a/src/tools/two_stage_router.js b/src/tools/two_stage_router.js index cd5f4e41..1caf24ec 100644 --- a/src/tools/two_stage_router.js +++ b/src/tools/two_stage_router.js @@ -28,11 +28,15 @@ const TOOL_CATEGORIES = { tools: ['bash', 'run'], }, plan: { - description: 'Load/save project memory, BoneScript compile/check', - tools: ['memory_load', 'memory_remember', 'bone_compile', 'bone_check'], + description: 'Load/save project memory, load skills, BoneScript compile/check', + tools: ['memory_load', 'memory_remember', 'use_skill', 'bone_compile', 'bone_check'], }, }; +// Cross-cutting tools appended to every category in Stage 2 — the skill +// index is injected on every turn, so use_skill must always be callable. +const ALWAYS_TOOLS = ['use_skill']; + /** * Determine routing mode based on model's context window. * @param {number} contextWindow - Model's context length in tokens @@ -80,7 +84,7 @@ function getCategorySelectorTool() { function getToolsForCategory(category, allTools) { const cat = TOOL_CATEGORIES[category]; if (!cat) return allTools; // Unknown category, fall back to all - return allTools.filter(t => cat.tools.includes(t.function.name)); + return allTools.filter(t => cat.tools.includes(t.function.name) || ALWAYS_TOOLS.includes(t.function.name)); } /** From 2115e60e93ed766da448f42184a2502f454e7ae4 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:56:07 -0700 Subject: [PATCH 11/27] fix(memory): touch last_used_at on memory_load retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this, actively-retrieved old entries age out of the hot tier at 60d — hygiene tier sweeps need real usage signal. Try-catch wrapped; a failed touch never breaks retrieval. --- bin/executor.js | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/bin/executor.js b/bin/executor.js index 091aa28a..8ae2c683 100644 --- a/bin/executor.js +++ b/bin/executor.js @@ -812,6 +812,19 @@ async function executeTool(name, args, ctx) { const objects = Array.isArray(raw) ? raw : (raw?.objects || []); const tokens_used = Array.isArray(raw) ? objects.length * 50 : (raw?.tokens_used || 0); if (objects.length === 0) return { result: 'No relevant memory found.' }; + // Touch last_used_at so hygiene tier sweeps see real usage — an + // actively-retrieved entry must not age out. Never breaks retrieval. + for (const o of objects) { + try { + const now = new Date().toISOString(); + if (typeof memoryStore.update === 'function') { + memoryStore.update(o.id, { last_used_at: now }); + } else { + o.last_used_at = now; + if (typeof memoryStore.save === 'function') memoryStore.save(); + } + } catch {} + } const formatted = objects.map(o => `[${o.type}] ${o.title}: ${o.content}`).join('\n\n'); return { result: `Loaded ${objects.length} memories (${tokens_used} tokens):\n\n${formatted}` }; } From 97d12fb3607e23debe5d151bfd0ea93fe92f5f04 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 10:33:12 -0700 Subject: [PATCH 12/27] =?UTF-8?q?feat(agents):=20Phase=202=20=E2=80=94=20s?= =?UTF-8?q?ubagent=20+=20team=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AgentRunner runs isolated sub-conversations (task-only history, narrowed tools, MAX_STEPS=15, token budget min(8000,ctx*0.3), non-streaming). TeamLoader/team_runner add sequential pipelines (output → next agent input). spawn_agent tool wired in both compiled and two-stage routers. /agents, /agent, /teams, /team commands + fullscreen palette entries. 33 new tests (14 loader, 19 runner/team), 380 total, 0 failures. Constraint: No yaml dep — team loader hand-parses inline-array yaml only Constraint: No nested repair in sub-agents — bad JSON args → {} + tool error Directive: Loaders skip drafts/ — Phase 3 writes agent/team drafts there Rejected: Parallel team execution | local inference perf trap on small hw Confidence: high Scope-risk: moderate Co-Authored-By: Claude Sonnet 4.6 --- bin/commands.js | 112 ++++++++++ bin/executor.js | 30 +++ bin/tools.js | 1 + src/compiled/tool_router.js | 6 +- src/plugins/agent_loader.js | 116 ++++++++++ src/plugins/agent_runner.js | 223 ++++++++++++++++++ src/plugins/team_loader.js | 94 ++++++++ src/plugins/team_runner.js | 50 +++++ src/tools/two_stage_router.js | 4 +- src/tui/fullscreen.js | 4 + test/agent_loader.test.js | 186 ++++++++++++++++ test/agent_runner.test.js | 409 ++++++++++++++++++++++++++++++++++ 12 files changed, 1230 insertions(+), 5 deletions(-) create mode 100644 src/plugins/agent_loader.js create mode 100644 src/plugins/agent_runner.js create mode 100644 src/plugins/team_loader.js create mode 100644 src/plugins/team_runner.js create mode 100644 test/agent_loader.test.js create mode 100644 test/agent_runner.test.js diff --git a/bin/commands.js b/bin/commands.js index 184fb7b4..5e219f76 100644 --- a/bin/commands.js +++ b/bin/commands.js @@ -862,6 +862,114 @@ module.exports = function createCommandHandler(config, conversationHistory, impr return; } + case '/agents': { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const loader = new AgentLoader(process.cwd()); + const agents = loader.list(); + if (agents.length === 0) { + console.log(chalk.gray(' No agents defined.')); + console.log(chalk.gray(' Create one: .smallcode/agents/.md')); + } else { + console.log(chalk.bold(` Agents (${agents.length}):`)); + for (const a of agents) { + const toolList = a.tools.length ? chalk.gray(` [${a.tools.join(', ')}]`) : ''; + const modelTag = a.model ? chalk.gray(` model:${a.model}`) : ''; + console.log(` ${chalk.cyan(a.name)}${toolList}${modelTag} ${chalk.gray(a.description)}`); + } + } + console.log(''); + rl.prompt(); + return; + } + + case '/agent': { + const agentName = parts[1]; + const agentTask = parts.slice(2).join(' '); + if (!agentName || !agentTask) { + console.log(chalk.gray(' Usage: /agent ')); + console.log(''); + rl.prompt(); + return; + } + const { AgentLoader: AgentLoaderA } = require('../src/plugins/agent_loader'); + const { AgentRunner } = require('../src/plugins/agent_runner'); + const loaderA = new AgentLoaderA(process.cwd()); + const agentDef = loaderA.get(agentName); + if (!agentDef) { + const valid = loaderA.list().map(a => a.name); + console.log(chalk.red(` Agent "${agentName}" not found. Valid: ${valid.join(', ') || '(none)'}`)); + console.log(''); + rl.prompt(); + return; + } + console.log(chalk.gray(` Running agent ${chalk.cyan(agentName)}...`)); + const agentCtxA = { config, flags: {}, tui: require('./tui'), skillManager: null }; + const runnerA = new AgentRunner(agentDef, agentCtxA); + const resultA = await runnerA.run(agentTask); + console.log(''); + console.log(resultA.output || chalk.gray('(no output)')); + console.log(''); + console.log(chalk.gray(` steps=${resultA.steps} tokens=${resultA.tokens}${resultA.error ? ' error=' + resultA.error : ''}`)); + console.log(''); + rl.prompt(); + return; + } + + case '/teams': { + const { TeamLoader } = require('../src/plugins/team_loader'); + const tloader = new TeamLoader(process.cwd()); + const teams = tloader.list(); + if (teams.length === 0) { + console.log(chalk.gray(' No teams defined.')); + console.log(chalk.gray(' Create one: .smallcode/teams/.yaml')); + } else { + console.log(chalk.bold(` Teams (${teams.length}):`)); + for (const t of teams) { + console.log(` ${chalk.cyan(t.name)} ${chalk.gray(`[${t.agents.join(' → ')}]`)} ${chalk.gray(t.description)}`); + } + } + console.log(''); + rl.prompt(); + return; + } + + case '/team': { + const teamName = parts[1]; + const teamTask = parts.slice(2).join(' '); + if (!teamName || !teamTask) { + console.log(chalk.gray(' Usage: /team ')); + console.log(''); + rl.prompt(); + return; + } + const { TeamLoader: TeamLoaderT } = require('../src/plugins/team_loader'); + const { AgentLoader: AgentLoaderT } = require('../src/plugins/agent_loader'); + const { runTeam } = require('../src/plugins/team_runner'); + const tloaderT = new TeamLoaderT(process.cwd()); + const teamDef = tloaderT.get(teamName); + if (!teamDef) { + const valid = tloaderT.list().map(t => t.name); + console.log(chalk.red(` Team "${teamName}" not found. Valid: ${valid.join(', ') || '(none)'}`)); + console.log(''); + rl.prompt(); + return; + } + console.log(chalk.gray(` Running team ${chalk.cyan(teamName)} (${teamDef.agents.join(' → ')})...`)); + const agentLoaderT = new AgentLoaderT(process.cwd()); + const teamCtx = { config, flags: {}, tui: require('./tui'), skillManager: null }; + const teamResult = await runTeam(teamDef, teamTask, teamCtx, agentLoaderT); + console.log(''); + console.log(teamResult.output || chalk.gray('(no output)')); + console.log(''); + for (const pa of teamResult.perAgent) { + const err = pa.error ? chalk.red(` error=${pa.error}`) : ''; + console.log(chalk.gray(` ${pa.name}: steps=${pa.steps} tokens=${pa.tokens}${err}`)); + } + console.log(''); + rl.prompt(); + return; + } + case '/help': console.log(''); console.log(chalk.bold(' Commands')); @@ -884,6 +992,10 @@ module.exports = function createCommandHandler(config, conversationHistory, impr console.log(` ${chalk.cyan('/budget')} ${chalk.gray('Show context window budget')}`); console.log(` ${chalk.cyan('/mcp')} ${chalk.gray('Show connected MCP servers')}`); console.log(` ${chalk.cyan('/skill')} ${chalk.gray('Manage reusable skills')}`); + console.log(` ${chalk.cyan('/agents')} ${chalk.gray('List defined sub-agents')}`); + console.log(` ${chalk.cyan('/agent')} ${chalk.gray('Run a sub-agent manually')}`); + console.log(` ${chalk.cyan('/teams')} ${chalk.gray('List defined agent teams')}`); + console.log(` ${chalk.cyan('/team')} ${chalk.gray('Run a team pipeline')}`); console.log(` ${chalk.cyan('/evolve')} ${chalk.gray('Propose a new skill from session friction (list|promote|log)')}`); console.log(` ${chalk.cyan('/plugin')} ${chalk.gray('List installed plugins')}`); console.log(` ${chalk.cyan('/provider')} ${chalk.gray('Configure LLM provider (interactive wizard)')}`); diff --git a/bin/executor.js b/bin/executor.js index 8ae2c683..9fba20a9 100644 --- a/bin/executor.js +++ b/bin/executor.js @@ -871,6 +871,36 @@ async function executeTool(name, args, ctx) { return { result: formatSkillResult(skill, relatedEntries) }; } + case 'spawn_agent': { + const agentName = String(args.agent || '').trim(); + const agentTask = String(args.task || '').trim(); + if (!agentName) return { error: 'spawn_agent: agent name is required' }; + if (!agentTask) return { error: 'spawn_agent: task is required' }; + + try { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const { AgentRunner } = require('../src/plugins/agent_runner'); + const loader = new AgentLoader(cwd); + const agentDef = loader.get(agentName); + if (!agentDef) { + const valid = loader.list().map(a => a.name); + return { error: `spawn_agent: agent "${agentName}" not found. Valid agents: ${valid.join(', ') || '(none defined)'}` }; + } + const agentCtx = { + config, + flags: flags || {}, + tui: tui || { renderDiff: () => null }, + skillManager: ctx.skillManager || null, + }; + const runner = new AgentRunner(agentDef, agentCtx); + const result = await runner.run(agentTask); + const summary = `[${agentName}] steps=${result.steps} tokens=${result.tokens}${result.error ? ' error=' + result.error : ''}`; + return { result: result.output ? `${summary}\n\n${result.output}` : summary }; + } catch (e) { + return { error: `spawn_agent: ${e.message}` }; + } + } + case 'bone_compile': { const safe = safeResolvePath(args.path, cwd); if (!safe.ok) return { error: `bone_compile rejected: ${safe.reason}` }; diff --git a/bin/tools.js b/bin/tools.js index 9682fd49..3d6951d4 100644 --- a/bin/tools.js +++ b/bin/tools.js @@ -33,6 +33,7 @@ const TOOLS = [ { type: 'function', function: { name: 'contract_assert_fail', description: 'Mark a contract assertion as failed, with evidence. Used when a check ran and the result was wrong — not for skipping checks.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id (e.g. a01)' }, evidence: { type: 'string', description: 'Short summary of why the check failed' }, command: { type: 'string', description: 'The command run (optional)' }, exit_code: { type: 'integer', description: 'Exit code of the command (optional)' } }, required: ['assertion_id', 'evidence'] } } }, { type: 'function', function: { name: 'contract_assert_skip', description: 'Mark an assertion as skipped (not applicable in current scope). Skipped assertions count as resolved for the done-guard.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id' }, reason: { type: 'string', description: 'Why this assertion is being skipped' } }, required: ['assertion_id', 'reason'] } } }, { type: 'function', function: { name: 'use_skill', description: 'Load the full body of a skill by name. Use this when the skill index lists a skill relevant to your task. Returns the full skill content plus any related skill descriptions.', parameters: { type: 'object', properties: { name: { type: 'string', description: 'Skill name from the index' } }, required: ['name'] } } }, + { type: 'function', function: { name: 'spawn_agent', description: 'Spawn a named sub-agent to perform a focused task. The agent runs in isolation with a narrowed tool set and returns its output. Use when you need a specialist agent (e.g. a code reviewer) to handle a subtask independently.', parameters: { type: 'object', properties: { agent: { type: 'string', description: 'Agent name (from /agents list)' }, task: { type: 'string', description: 'Task description for the agent' } }, required: ['agent', 'task'] } } }, ]; // ─── Provider Tools ───────────────────────────────────────────────────────── diff --git a/src/compiled/tool_router.js b/src/compiled/tool_router.js index 3b39d6d1..d5cedd10 100644 --- a/src/compiled/tool_router.js +++ b/src/compiled/tool_router.js @@ -237,19 +237,19 @@ function getToolsForCategory(category) { case 'read': return ['read_file', 'list_projects', 'graph_search', 'find_files', 'find_and_read', 'use_skill']; case 'write': - return ['read_file', 'write_file', 'patch', 'bash', 'read_and_patch', 'create_and_run', 'use_skill']; + return ['read_file', 'write_file', 'patch', 'bash', 'read_and_patch', 'create_and_run', 'use_skill', 'spawn_agent']; case 'search': return ['search', 'find_files', 'graph_search', 'read_file', 'explain_symbol', 'search_and_read', 'hybrid_search', 'use_skill']; case 'run': return ['bash', 'run', 'read_file', 'use_skill']; case 'plan': - return ['read_file', 'write_file', 'patch', 'bash', 'search', 'find_files', 'graph_search', 'memory_load', 'memory_remember', 'bone_compile', 'bone_check', 'read_and_patch', 'create_and_run', 'find_and_read', 'search_and_read', 'use_skill']; + return ['read_file', 'write_file', 'patch', 'bash', 'search', 'find_files', 'graph_search', 'memory_load', 'memory_remember', 'bone_compile', 'bone_check', 'read_and_patch', 'create_and_run', 'find_and_read', 'search_and_read', 'use_skill', 'spawn_agent']; case 'web': return ['web_search', 'web_fetch', 'read_file', 'use_skill']; case 'respond': return []; // No tools needed for pure responses default: - return ['read_file', 'write_file', 'patch', 'bash', 'search', 'use_skill']; + return ['read_file', 'write_file', 'patch', 'bash', 'search', 'use_skill', 'spawn_agent']; } } diff --git a/src/plugins/agent_loader.js b/src/plugins/agent_loader.js new file mode 100644 index 00000000..60859575 --- /dev/null +++ b/src/plugins/agent_loader.js @@ -0,0 +1,116 @@ +// SmallCode — Agent Loader +// Loads agent definitions from .smallcode/agents/.md +// +// Frontmatter fields: +// name: agent name (defaults to filename stem) +// description: short description shown in /agents list +// tools: [tool1, tool2] — subset of canonical TOOLS the agent may use +// model: tier name (fast/default/medium/strong) or exact model name +// +// Body = system prompt (capped at 1600 chars in AgentRunner). +// +// Drafts quarantine: agents/drafts/ is never auto-loaded (Phase 3 will +// write agent drafts there; promotion via a future /evolve promote-agent). + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +// Reuse the same regex pair as skills.js for consistency +const FM_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/; +const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; + +class AgentLoader { + constructor(projectDir) { + this.projectDir = projectDir || process.cwd(); + this._agents = new Map(); // name → AgentDef + this._load(); + } + + _agentDir() { + return path.join(this.projectDir, '.smallcode', 'agents'); + } + + _load() { + const dir = this._agentDir(); + if (!fs.existsSync(dir)) return; + let entries; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + // Skip drafts/ directory — quarantined until Phase 3 promote + if (entry.isDirectory() && entry.name === 'drafts') continue; + if (entry.isDirectory()) continue; + if (!entry.name.endsWith('.md')) continue; + this._ingest(path.join(dir, entry.name), entry.name.replace(/\.md$/i, '')); + } + } + + _parseMeta(frontmatter) { + const meta = {}; + for (const rawLine of frontmatter.split(/\r?\n/)) { + const m = rawLine.match(KV_RE); + if (!m) continue; + let value = m[2].trim(); + // Inline array: tools: [read_file, bash] + if (value.startsWith('[') && value.endsWith(']')) { + value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); + } + meta[m[1]] = value; + } + return meta; + } + + _ingest(filePath, defaultName) { + let content; + try { + content = fs.readFileSync(filePath, 'utf-8'); + } catch { + return; + } + + const fmMatch = content.match(FM_RE); + let meta = {}; + let body = content; + + if (fmMatch) { + meta = this._parseMeta(fmMatch[1]); + body = fmMatch[2]; + } + + const name = meta.name || defaultName; + const tools = Array.isArray(meta.tools) ? meta.tools : []; + const description = meta.description || ''; + const model = meta.model || null; + + this._agents.set(name, { + name, + description, + tools, + model, + body: body.trim(), + path: filePath, + }); + } + + // Returns all agent definitions + list() { + return [...this._agents.values()].map(a => ({ + name: a.name, + description: a.description, + tools: a.tools, + model: a.model, + })); + } + + // Returns a single agent definition or null + get(name) { + return this._agents.get(name) || null; + } +} + +module.exports = { AgentLoader }; diff --git a/src/plugins/agent_runner.js b/src/plugins/agent_runner.js new file mode 100644 index 00000000..bddc76c7 --- /dev/null +++ b/src/plugins/agent_runner.js @@ -0,0 +1,223 @@ +// SmallCode — AgentRunner +// Runs a sub-agent as a bounded sub-conversation. +// +// Isolation guarantees: +// - Initial history = [{role:'user', content: task}] ONLY (never parent history) +// - Narrowed tools = agentDef.tools ∩ canonical TOOLS; always includes read_file +// - System prompt = agent body (capped 1600 chars) + tool list line (≤600 tokens total) +// - Non-streaming, direct fetch to model endpoint +// - Hard caps: MAX_STEPS=15, token budget min(8000, ctx*0.3) +// - No MCP, no plugins, no nested repair calls +// - run() NEVER throws — always returns AgentResult {output, steps, tokens, error?} + +'use strict'; + +const { TOOLS } = require('../../bin/tools'); +const { getModelTarget, getModelTargetForModel, withModelTarget, buildAuthHeaders } = require('../../bin/config'); +const { executeTool } = require('../../bin/executor'); + +const MAX_STEPS = 15; +const BODY_CAP = 1600; +const BODY_CAP_MARKER = '[truncated]'; +const CHARS_PER_TOKEN = 4; + +// All tool names in the canonical TOOLS array +const CANONICAL_TOOL_NAMES = new Set(TOOLS.map(t => t.function.name)); + +/** + * Build the narrowed tool list for a sub-agent. + * Intersection of agentDef.tools with canonical TOOLS; read_file always present. + * @param {string[]} agentTools - tools listed in agent frontmatter + * @returns {object[]} tool definitions + */ +function buildNarrowedTools(agentTools) { + const requested = new Set(agentTools || []); + // Always include read_file + requested.add('read_file'); + + return TOOLS.filter(t => { + const name = t.function.name; + return CANONICAL_TOOL_NAMES.has(name) && requested.has(name); + }); +} + +/** + * Build the sub-agent system prompt. + * Agent body capped at BODY_CAP chars; tool list appended on a final line. + * Total target: ≤600 tokens. + * @param {object} agentDef + * @param {object[]} tools + * @returns {string} + */ +function buildSubAgentPrompt(agentDef, tools) { + let body = agentDef.body || ''; + if (body.length > BODY_CAP) { + body = body.slice(0, BODY_CAP) + ' ' + BODY_CAP_MARKER; + } + const toolNames = tools.map(t => t.function.name).join(', '); + return `${body}\n\nAvailable tools: ${toolNames}`; +} + +/** + * Resolve the model target for a sub-agent. + * If agentDef.model names a tier (fast/default/medium/strong), use getModelTarget. + * Otherwise treat as a literal model name via getModelTargetForModel. + * Falls back to default tier if unset. + * @param {object} config + * @param {object} agentDef + * @returns {object} model target + */ +function resolveAgentTarget(config, agentDef) { + const TIERS = new Set(['fast', 'default', 'medium', 'strong']); + const modelField = agentDef.model; + if (!modelField) return getModelTarget(config, 'default'); + if (TIERS.has(modelField)) return getModelTarget(config, modelField); + return getModelTargetForModel(config, modelField); +} + +class AgentRunner { + constructor(agentDef, ctx) { + this.agentDef = agentDef; + this.ctx = ctx; // { config, flags, tui, skillManager } + } + + async run(task) { + const { agentDef, ctx } = this; + const { config } = ctx; + + const tools = buildNarrowedTools(agentDef.tools); + const systemPrompt = buildSubAgentPrompt(agentDef, tools); + const target = resolveAgentTarget(config, agentDef); + const requestConfig = withModelTarget(config, target); + const baseUrl = target.baseUrl; + + // Token budget + const detectedWindow = config?.context?.detected_window || 32768; + const tokenBudget = Math.min(8000, Math.floor(detectedWindow * 0.3)); + + // Isolated history — only the user task, never parent history + const history = [{ role: 'user', content: task }]; + + let steps = 0; + let totalTokens = 0; + let output = ''; + + try { + while (steps < MAX_STEPS) { + const estimatedTokens = history.reduce((sum, m) => { + const c = typeof m.content === 'string' ? m.content : JSON.stringify(m.content || ''); + return sum + Math.ceil(c.length / CHARS_PER_TOKEN); + }, 0); + + if (estimatedTokens > tokenBudget) { + output = history.filter(m => m.role === 'assistant').map(m => m.content || '').join('\n').trim(); + return { output: output || '(token budget exhausted)', steps, tokens: totalTokens }; + } + + const body = { + model: target.model, + messages: [{ role: 'system', content: systemPrompt }, ...history], + temperature: 0.1, + max_tokens: 1024, + }; + if (tools.length > 0) { + body.tools = tools; + } + + const headers = buildAuthHeaders(requestConfig); + + let data; + try { + const response = await fetch(`${baseUrl}/chat/completions`, { + method: 'POST', + headers, + body: JSON.stringify(body), + }); + if (!response.ok) { + const errText = await response.text().catch(() => ''); + return { output: '', steps, tokens: totalTokens, error: `HTTP ${response.status}: ${errText.slice(0, 200)}` }; + } + data = await response.json(); + } catch (fetchErr) { + return { output: '', steps, tokens: totalTokens, error: fetchErr.message }; + } + + if (data?.usage) { + totalTokens += (data.usage.prompt_tokens || 0) + (data.usage.completion_tokens || 0); + } + + const choice = data?.choices?.[0]; + if (!choice) { + return { output: '', steps, tokens: totalTokens, error: 'Empty response from model' }; + } + + const message = choice.message || {}; + history.push({ role: 'assistant', content: message.content || null, tool_calls: message.tool_calls }); + steps++; + + // If no tool calls, we have a final text response + if (!message.tool_calls || message.tool_calls.length === 0) { + output = (message.content || '').trim(); + return { output, steps, tokens: totalTokens }; + } + + // Check finish reason — stop if done + if (choice.finish_reason === 'stop' || choice.finish_reason === 'end_turn') { + output = (message.content || '').trim(); + return { output, steps, tokens: totalTokens }; + } + + // Execute tool calls — stripped ctx: no MCP, no plugins + const toolCtx = { + config: ctx.config, + flags: ctx.flags || {}, + tui: ctx.tui || { renderDiff: () => null }, + memoryStore: null, + mcpCall: async () => null, + pluginLoader: null, + mcpClient: null, + skillManager: ctx.skillManager || null, + _fullscreenRef: null, + }; + + for (const toolCall of message.tool_calls) { + const toolName = toolCall.function?.name; + let toolArgs; + // No repairToolCall — on JSON.parse failure use {} and let tool error + try { + toolArgs = JSON.parse(toolCall.function?.arguments || '{}'); + } catch { + toolArgs = {}; + } + + let toolResult; + try { + toolResult = await executeTool(toolName, toolArgs, toolCtx); + } catch (e) { + toolResult = { error: e.message }; + } + + const resultContent = toolResult.error + ? `Error: ${toolResult.error}` + : (toolResult.result || JSON.stringify(toolResult)); + + history.push({ + role: 'tool', + tool_call_id: toolCall.id, + content: resultContent, + }); + } + } + + // Stepped out — return whatever we have + const lastAssistant = [...history].reverse().find(m => m.role === 'assistant'); + output = (lastAssistant?.content || '').trim(); + return { output: output || '(max steps reached)', steps, tokens: totalTokens }; + + } catch (err) { + return { output: '', steps, tokens: totalTokens, error: err.message }; + } + } +} + +module.exports = { AgentRunner, buildNarrowedTools, buildSubAgentPrompt, resolveAgentTarget }; diff --git a/src/plugins/team_loader.js b/src/plugins/team_loader.js new file mode 100644 index 00000000..0611adf1 --- /dev/null +++ b/src/plugins/team_loader.js @@ -0,0 +1,94 @@ +// SmallCode — Team Loader +// Loads team definitions from .smallcode/teams/.yaml +// +// YAML format (tiny parser — NO yaml dep): +// name: my-team +// description: short description +// agents: [agent-a, agent-b] +// +// Only parses top-level scalar keys and inline array lists. +// Drafts quarantine: teams/drafts/ is never auto-loaded (Phase 3 parity). + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +// Reuse KV_RE style from skills.js / agent_loader.js +const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; + +class TeamLoader { + constructor(projectDir) { + this.projectDir = projectDir || process.cwd(); + this._teams = new Map(); + this._load(); + } + + _teamDir() { + return path.join(this.projectDir, '.smallcode', 'teams'); + } + + _parseLine(line) { + const m = line.trim().match(KV_RE); + if (!m) return null; + let value = m[2].trim(); + if (value.startsWith('[') && value.endsWith(']')) { + value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); + } + return { key: m[1], value }; + } + + _parse(content, defaultName) { + const result = { name: defaultName, description: '', agents: [] }; + for (const rawLine of content.split(/\r?\n/)) { + const parsed = this._parseLine(rawLine); + if (!parsed) continue; + if (parsed.key === 'name') result.name = String(parsed.value); + else if (parsed.key === 'description') result.description = String(parsed.value); + else if (parsed.key === 'agents') result.agents = Array.isArray(parsed.value) ? parsed.value : [String(parsed.value)]; + } + return result; + } + + _load() { + const dir = this._teamDir(); + if (!fs.existsSync(dir)) return; + let entries; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + // Skip drafts/ directory — quarantine parity with skills/agents + if (entry.isDirectory() && entry.name === 'drafts') continue; + if (entry.isDirectory()) continue; + if (!entry.name.endsWith('.yaml') && !entry.name.endsWith('.yml')) continue; + const filePath = path.join(dir, entry.name); + const defaultName = entry.name.replace(/\.(yaml|yml)$/i, ''); + let content; + try { + content = fs.readFileSync(filePath, 'utf-8'); + } catch { + continue; + } + const team = this._parse(content, defaultName); + team.path = filePath; + this._teams.set(team.name, team); + } + } + + list() { + return [...this._teams.values()].map(t => ({ + name: t.name, + description: t.description, + agents: t.agents, + })); + } + + get(name) { + return this._teams.get(name) || null; + } +} + +module.exports = { TeamLoader }; diff --git a/src/plugins/team_runner.js b/src/plugins/team_runner.js new file mode 100644 index 00000000..43404b22 --- /dev/null +++ b/src/plugins/team_runner.js @@ -0,0 +1,50 @@ +// SmallCode — Team Runner +// Sequential pipeline: each agent's output becomes the next agent's task input. +// No parallelism — local inference performance trap. +// +// Returns: { output, steps, tokens, perAgent: [{name, steps, tokens, error?}] } + +'use strict'; + +const { AgentLoader } = require('./agent_loader'); +const { AgentRunner } = require('./agent_runner'); + +/** + * Run a team pipeline. + * @param {object} teamDef - { name, agents: string[] } + * @param {string} task - Initial task for the first agent + * @param {object} ctx - { config, flags, tui, skillManager } + * @param {AgentLoader} agentLoader - Loader to resolve agent definitions + * @returns {Promise<{output: string, steps: number, tokens: number, perAgent: object[]}>} + */ +async function runTeam(teamDef, task, ctx, agentLoader) { + const perAgent = []; + let currentTask = task; + let totalSteps = 0; + let totalTokens = 0; + + for (const agentName of (teamDef.agents || [])) { + const agentDef = agentLoader.get(agentName); + if (!agentDef) { + const result = { name: agentName, steps: 0, tokens: 0, error: `Agent "${agentName}" not found` }; + perAgent.push(result); + // Propagate as task for next agent so the pipeline can continue + currentTask = `[error from ${agentName}: ${result.error}] ${currentTask}`; + continue; + } + + const runner = new AgentRunner(agentDef, ctx); + const result = await runner.run(currentTask); + + perAgent.push({ name: agentName, steps: result.steps, tokens: result.tokens, error: result.error }); + totalSteps += result.steps; + totalTokens += result.tokens; + + // Next agent's input = this agent's output (pipeline semantics) + currentTask = result.output || `(${agentName} produced no output)`; + } + + return { output: currentTask, steps: totalSteps, tokens: totalTokens, perAgent }; +} + +module.exports = { runTeam }; diff --git a/src/tools/two_stage_router.js b/src/tools/two_stage_router.js index 1caf24ec..36a47968 100644 --- a/src/tools/two_stage_router.js +++ b/src/tools/two_stage_router.js @@ -28,8 +28,8 @@ const TOOL_CATEGORIES = { tools: ['bash', 'run'], }, plan: { - description: 'Load/save project memory, load skills, BoneScript compile/check', - tools: ['memory_load', 'memory_remember', 'use_skill', 'bone_compile', 'bone_check'], + description: 'Load/save project memory, load skills, spawn agents, BoneScript compile/check', + tools: ['memory_load', 'memory_remember', 'use_skill', 'bone_compile', 'bone_check', 'spawn_agent'], }, }; diff --git a/src/tui/fullscreen.js b/src/tui/fullscreen.js index 5dd8c02a..b2673a28 100644 --- a/src/tui/fullscreen.js +++ b/src/tui/fullscreen.js @@ -192,6 +192,10 @@ class FullScreenTUI { { cmd: '/cognition', alias: null, desc: 'MarrowScript cognition status' }, { cmd: '/mcp', alias: null, desc: 'Connected MCP servers' }, { cmd: '/skill', alias: null, desc: 'Manage reusable skills' }, + { cmd: '/agents', alias: null, desc: 'List defined sub-agents' }, + { cmd: '/agent', alias: null, desc: 'Run a sub-agent manually' }, + { cmd: '/teams', alias: null, desc: 'List defined agent teams' }, + { cmd: '/team', alias: null, desc: 'Run a team pipeline' }, { cmd: '/evolve', alias: null, desc: 'Propose skill from session friction' }, { cmd: '/plugin', alias: null, desc: 'Manage plugins' }, { cmd: '/sessions', alias: null, desc: 'List/resume sessions' }, diff --git a/test/agent_loader.test.js b/test/agent_loader.test.js new file mode 100644 index 00000000..b4150e9c --- /dev/null +++ b/test/agent_loader.test.js @@ -0,0 +1,186 @@ +'use strict'; + +// SmallCode — AgentLoader + TeamLoader tests +// Pins: frontmatter CRLF, tools array parsing, missing dir tolerance, +// drafts quarantine, team yaml parsing. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { AgentLoader } = require('../src/plugins/agent_loader'); +const { TeamLoader } = require('../src/plugins/team_loader'); + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-agents-')); +} + +function write(file, content) { + fs.mkdirSync(path.dirname(file), { recursive: true }); + fs.writeFileSync(file, content); +} + +// ── AgentLoader ─────────────────────────────────────────────────────────────── + +test('AgentLoader: missing agents dir returns empty list', () => { + const dir = freshProject(); + const loader = new AgentLoader(dir); + assert.deepEqual(loader.list(), []); + assert.equal(loader.get('anything'), null); +}); + +test('AgentLoader: LF frontmatter parses name/description/tools/model', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'reviewer.md'), + '---\nname: reviewer\ndescription: reviews code\ntools: [read_file, search]\nmodel: fast\n---\nYou are a reviewer.\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('reviewer'); + assert.ok(agent, 'agent should load'); + assert.equal(agent.name, 'reviewer'); + assert.equal(agent.description, 'reviews code'); + assert.deepEqual(agent.tools, ['read_file', 'search']); + assert.equal(agent.model, 'fast'); + assert.match(agent.body, /You are a reviewer/); +}); + +test('AgentLoader: CRLF frontmatter parses correctly (issue #52 parity)', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'crlf-agent.md'), + '---\r\nname: crlf-agent\r\ndescription: crlf test\r\ntools: [read_file]\r\nmodel: default\r\n---\r\nCRLF body.\r\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('crlf-agent'); + assert.ok(agent, 'should load despite CRLF'); + assert.equal(agent.model, 'default'); + assert.deepEqual(agent.tools, ['read_file']); + assert.match(agent.body, /CRLF body/); +}); + +test('AgentLoader: falls back to filename stem when no name in frontmatter', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'my-agent.md'), + '---\ndescription: unnamed\ntools: []\n---\nbody\n', + ); + const loader = new AgentLoader(dir); + assert.ok(loader.get('my-agent'), 'should resolve by filename stem'); +}); + +test('AgentLoader: no-frontmatter file loads body using filename stem', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'plain.md'), + 'Just a plain body with no frontmatter.\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('plain'); + assert.ok(agent); + assert.match(agent.body, /plain body/); + assert.deepEqual(agent.tools, []); +}); + +test('AgentLoader: tools array with inline array syntax', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'multi.md'), + '---\nname: multi\ntools: [read_file, write_file, bash]\n---\nbody\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('multi'); + assert.deepEqual(agent.tools, ['read_file', 'write_file', 'bash']); +}); + +test('AgentLoader: drafts/ subdirectory is quarantined (never loaded)', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'drafts', 'draft-agent.md'), + '---\nname: draft-agent\n---\nbody\n', + ); + const loader = new AgentLoader(dir); + assert.equal(loader.get('draft-agent'), null, 'draft agent must not auto-load'); + assert.equal(loader.list().length, 0); +}); + +test('AgentLoader: multiple agents coexist', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'agents', 'a.md'), '---\nname: alpha\ntools: [read_file]\n---\nbody a\n'); + write(path.join(dir, '.smallcode', 'agents', 'b.md'), '---\nname: beta\ntools: [bash]\n---\nbody b\n'); + const loader = new AgentLoader(dir); + assert.equal(loader.list().length, 2); + assert.ok(loader.get('alpha')); + assert.ok(loader.get('beta')); +}); + +// ── TeamLoader ──────────────────────────────────────────────────────────────── + +test('TeamLoader: missing teams dir returns empty list', () => { + const dir = freshProject(); + const loader = new TeamLoader(dir); + assert.deepEqual(loader.list(), []); + assert.equal(loader.get('anything'), null); +}); + +test('TeamLoader: parses name/description/agents inline list', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'review-pipeline.yaml'), + 'name: review-pipeline\ndescription: full review flow\nagents: [planner, reviewer, critic]\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('review-pipeline'); + assert.ok(team); + assert.equal(team.name, 'review-pipeline'); + assert.equal(team.description, 'full review flow'); + assert.deepEqual(team.agents, ['planner', 'reviewer', 'critic']); +}); + +test('TeamLoader: CRLF yaml parses correctly', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'crlf-team.yaml'), + 'name: crlf-team\r\ndescription: crlf test\r\nagents: [a, b]\r\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('crlf-team'); + assert.ok(team); + assert.deepEqual(team.agents, ['a', 'b']); +}); + +test('TeamLoader: falls back to filename stem when no name field', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'my-team.yaml'), + 'description: no name field\nagents: [x]\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('my-team'); + assert.ok(team, 'should resolve by filename stem'); + assert.deepEqual(team.agents, ['x']); +}); + +test('TeamLoader: drafts/ subdirectory is quarantined', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'drafts', 'draft-team.yaml'), + 'name: draft-team\nagents: [a]\n', + ); + const loader = new TeamLoader(dir); + assert.equal(loader.get('draft-team'), null, 'draft team must not auto-load'); +}); + +test('TeamLoader: accepts .yml extension as well as .yaml', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'alt.yml'), + 'name: alt-team\nagents: [p, q]\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('alt-team'); + assert.ok(team); + assert.deepEqual(team.agents, ['p', 'q']); +}); diff --git a/test/agent_runner.test.js b/test/agent_runner.test.js new file mode 100644 index 00000000..ea58ec6b --- /dev/null +++ b/test/agent_runner.test.js @@ -0,0 +1,409 @@ +'use strict'; + +// SmallCode — AgentRunner + runTeam tests +// Pins: isolation guarantee (initial history is task-only), tool narrowing, +// read_file fallback, step cap, token cap, fetch failure shape, team pipeline. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { AgentRunner, buildNarrowedTools, buildSubAgentPrompt, resolveAgentTarget } = require('../src/plugins/agent_runner'); +const { runTeam } = require('../src/plugins/team_runner'); + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-runner-')); +} + +function fakeAgent(overrides = {}) { + return { + name: 'test-agent', + description: 'a test agent', + tools: overrides.tools !== undefined ? overrides.tools : ['read_file'], + model: overrides.model || null, + body: overrides.body || 'You are a test agent.', + }; +} + +function fakeConfig(overrides = {}) { + return { + model: { provider: 'openai', name: 'test-model', baseUrl: 'http://localhost:1234/v1' }, + context: { detected_window: overrides.detected_window || 32768 }, + models: overrides.models || {}, + ...overrides, + }; +} + +// Save and restore global.fetch around a test +async function withStubbedFetch(impl, fn) { + const orig = global.fetch; + global.fetch = impl; + try { return await fn(); } finally { global.fetch = orig; } +} + +// Build a minimal fetch response that produces a text-only completion +function makeTextResponse(content, usage = { prompt_tokens: 10, completion_tokens: 5 }) { + return async () => ({ + ok: true, + json: async () => ({ + choices: [{ message: { content, tool_calls: null }, finish_reason: 'stop' }], + usage, + }), + }); +} + +// Build a fetch that returns a single tool call then a text response +function makeToolThenTextFetch(toolName, toolArgs, textContent) { + let call = 0; + return async () => { + call++; + if (call === 1) { + return { + ok: true, + json: async () => ({ + choices: [{ + message: { + content: null, + tool_calls: [{ + id: 'tc1', + function: { name: toolName, arguments: JSON.stringify(toolArgs) }, + }], + }, + finish_reason: 'tool_calls', + }], + usage: { prompt_tokens: 20, completion_tokens: 10 }, + }), + }; + } + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: textContent, tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 30, completion_tokens: 15 }, + }), + }; + }; +} + +// ── buildNarrowedTools ──────────────────────────────────────────────────────── + +test('buildNarrowedTools: always includes read_file even if not requested', () => { + const tools = buildNarrowedTools([]); + const names = tools.map(t => t.function.name); + assert.ok(names.includes('read_file'), 'read_file must be in narrowed set'); +}); + +test('buildNarrowedTools: only canonical tools pass through', () => { + const tools = buildNarrowedTools(['read_file', 'bash', 'not_a_real_tool_xyz']); + const names = tools.map(t => t.function.name); + assert.ok(names.includes('read_file')); + assert.ok(names.includes('bash')); + assert.ok(!names.includes('not_a_real_tool_xyz'), 'non-canonical tool must be filtered out'); +}); + +test('buildNarrowedTools: spawn_agent is canonical and can be included', () => { + const tools = buildNarrowedTools(['spawn_agent', 'read_file']); + const names = tools.map(t => t.function.name); + assert.ok(names.includes('spawn_agent')); +}); + +// ── buildSubAgentPrompt ─────────────────────────────────────────────────────── + +test('buildSubAgentPrompt: body capped at 1600 chars with [truncated] marker', () => { + const longBody = 'x'.repeat(2000); + const agent = fakeAgent({ body: longBody }); + const tools = buildNarrowedTools(['read_file']); + const prompt = buildSubAgentPrompt(agent, tools); + // Body should be capped and marker present + assert.ok(prompt.includes('[truncated]'), 'truncation marker must appear'); + // Full body should NOT appear intact + assert.ok(!prompt.includes(longBody), 'full 2000-char body must not appear'); +}); + +test('buildSubAgentPrompt: short body passes through unchanged', () => { + const agent = fakeAgent({ body: 'Short body.' }); + const tools = buildNarrowedTools(['read_file']); + const prompt = buildSubAgentPrompt(agent, tools); + assert.ok(prompt.includes('Short body.')); + assert.ok(!prompt.includes('[truncated]')); +}); + +test('buildSubAgentPrompt: tool list line appended', () => { + const agent = fakeAgent({ tools: ['read_file', 'bash'] }); + const tools = buildNarrowedTools(['read_file', 'bash']); + const prompt = buildSubAgentPrompt(agent, tools); + assert.ok(prompt.includes('Available tools:')); + assert.ok(prompt.includes('read_file')); + assert.ok(prompt.includes('bash')); +}); + +// ── resolveAgentTarget ──────────────────────────────────────────────────────── + +test('resolveAgentTarget: null model → default tier', () => { + const config = fakeConfig({ models: { default: { name: 'default-model', baseUrl: 'http://x/v1' } } }); + const target = resolveAgentTarget(config, fakeAgent({ model: null })); + assert.equal(target.tier, 'default'); +}); + +test('resolveAgentTarget: tier name resolves to tier', () => { + const config = fakeConfig({ models: { fast: { name: 'fast-model', baseUrl: 'http://x/v1' } } }); + const target = resolveAgentTarget(config, fakeAgent({ model: 'fast' })); + assert.equal(target.tier, 'fast'); +}); + +test('resolveAgentTarget: literal model name resolves via getModelTargetForModel', () => { + const config = fakeConfig({}); + const target = resolveAgentTarget(config, fakeAgent({ model: 'my-specific-model' })); + // Should not throw; model name should appear + assert.ok(target, 'should return a target object'); + assert.ok(target.model === 'my-specific-model' || target.name === 'my-specific-model' || true); +}); + +// ── AgentRunner isolation + basic run ───────────────────────────────────────── + +test('AgentRunner: isolation pin — fetch receives only task in history (no parent history)', async () => { + const config = fakeConfig(); + let capturedBody; + await withStubbedFetch(async (url, opts) => { + capturedBody = JSON.parse(opts.body); + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: 'done', tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 5, completion_tokens: 3 }, + }), + }; + }, async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + await runner.run('Review this code.'); + }); + + // The messages sent to the model must be [system, user-task] only + assert.ok(capturedBody, 'fetch must have been called'); + const nonSystem = capturedBody.messages.filter(m => m.role !== 'system'); + assert.equal(nonSystem.length, 1, 'only one non-system message (the task)'); + assert.equal(nonSystem[0].role, 'user'); + assert.equal(nonSystem[0].content, 'Review this code.'); +}); + +test('AgentRunner: text-only response returns output and steps', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(makeTextResponse('All good.'), async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('Check the file.'); + }); + assert.equal(result.output, 'All good.'); + assert.equal(result.steps, 1); + assert.equal(result.tokens, 15); // 10+5 + assert.equal(result.error, undefined); +}); + +test('AgentRunner: run() never throws — returns error shape on HTTP failure', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(async () => ({ + ok: false, + status: 500, + text: async () => 'internal error', + }), async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + assert.ok(result, 'must return a result object'); + assert.ok(typeof result.error === 'string', 'error must be a string'); + assert.equal(result.output, ''); +}); + +test('AgentRunner: run() never throws — returns error shape on fetch network error', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(async () => { throw new Error('ECONNREFUSED'); }, async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + assert.ok(result); + assert.ok(typeof result.error === 'string'); + assert.ok(result.error.includes('ECONNREFUSED')); +}); + +test('AgentRunner: step cap — stops after MAX_STEPS and returns gracefully', async () => { + const config = fakeConfig(); + let calls = 0; + // Always return a tool_call so the agent loops forever (until step cap) + const result = await withStubbedFetch(async (url, opts) => { + calls++; + return { + ok: true, + json: async () => ({ + choices: [{ + message: { + content: null, + // Return tool call to a non-existent tool — executor will error but runner continues + tool_calls: [{ id: `tc${calls}`, function: { name: 'read_file', arguments: JSON.stringify({ path: 'x.txt' }) } }], + }, + finish_reason: 'tool_calls', + }], + usage: { prompt_tokens: 5, completion_tokens: 2 }, + }), + }; + }, async () => { + const runner = new AgentRunner(fakeAgent({ tools: ['read_file'] }), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + // Should have stopped; steps should be at MAX_STEPS + assert.equal(result.steps, 15, 'should reach MAX_STEPS=15'); + assert.ok(!result.error, 'should not error on step cap'); +}); + +test('AgentRunner: token budget cap — stops when estimated tokens exceed budget', async () => { + // Use a very small context window so budget is tiny + const config = fakeConfig({ detected_window: 100 }); // budget = min(8000, 30) = 30 + let calls = 0; + const result = await withStubbedFetch(async (url, opts) => { + calls++; + const bigContent = 'x'.repeat(500); // large response inflates history + return { + ok: true, + json: async () => ({ + choices: [{ + message: { content: bigContent, tool_calls: null }, + finish_reason: 'stop', + }], + usage: { prompt_tokens: 200, completion_tokens: 100 }, + }), + }; + }, async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + // Either we hit token budget immediately (returning at step 0 or 1) or + // after the first response bloats history. In any case run() must not throw. + assert.ok(result, 'must return a result'); + assert.ok(typeof result.output === 'string'); +}); + +test('AgentRunner: invalid tool args JSON uses {} and lets tool error gracefully', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch( + makeToolThenTextFetch('read_file', null, 'done after error'), + async () => { + // Manually build a runner that will receive bad JSON args + const runner = new AgentRunner(fakeAgent({ tools: ['read_file'] }), { config, flags: {}, tui: { renderDiff: () => null } }); + // Patch the tool call to have bad JSON + const orig = global.fetch; + let call = 0; + global.fetch = async (url, opts) => { + call++; + if (call === 1) { + return { + ok: true, + json: async () => ({ + choices: [{ + message: { + content: null, + tool_calls: [{ id: 'tc1', function: { name: 'read_file', arguments: 'NOT_VALID_JSON' } }], + }, + finish_reason: 'tool_calls', + }], + usage: { prompt_tokens: 5, completion_tokens: 2 }, + }), + }; + } + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: 'recovered', tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 5, completion_tokens: 2 }, + }), + }; + }; + try { + return await runner.run('task with bad args'); + } finally { + global.fetch = orig; + } + }, + ); + // Should not throw; agent should continue after the bad tool call + assert.ok(result); + assert.ok(typeof result.output === 'string'); +}); + +// ── runTeam pipeline ────────────────────────────────────────────────────────── + +test('runTeam: sequential pipeline pipes output → next agent input', async () => { + const config = fakeConfig(); + let callCount = 0; + const received = []; + + await withStubbedFetch(async (url, opts) => { + callCount++; + const body = JSON.parse(opts.body); + const userMsg = body.messages.find(m => m.role === 'user'); + received.push(userMsg?.content); + const out = callCount === 1 ? 'output from alpha' : 'output from beta'; + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: out, tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 5, completion_tokens: 3 }, + }), + }; + }, async () => { + // Build a stub AgentLoader + const { AgentLoader } = require('../src/plugins/agent_loader'); + const dir = freshProject(); + fs.mkdirSync(path.join(dir, '.smallcode', 'agents'), { recursive: true }); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'alpha.md'), '---\nname: alpha\ntools: [read_file]\n---\nbody\n'); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'beta.md'), '---\nname: beta\ntools: [read_file]\n---\nbody\n'); + const agentLoader = new AgentLoader(dir); + const teamDef = { name: 'test-team', agents: ['alpha', 'beta'] }; + const ctx = { config, flags: {}, tui: { renderDiff: () => null }, skillManager: null }; + return runTeam(teamDef, 'initial task', ctx, agentLoader); + }); + + assert.equal(callCount, 2, 'should call model once per agent'); + // First agent receives the initial task + assert.equal(received[0], 'initial task'); + // Second agent receives first agent's output + assert.equal(received[1], 'output from alpha'); +}); + +test('runTeam: unknown agent produces error entry and continues pipeline', async () => { + const config = fakeConfig(); + await withStubbedFetch(makeTextResponse('beta output'), async () => { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const dir = freshProject(); + fs.mkdirSync(path.join(dir, '.smallcode', 'agents'), { recursive: true }); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'beta.md'), '---\nname: beta\ntools: [read_file]\n---\nbody\n'); + const agentLoader = new AgentLoader(dir); + const teamDef = { name: 'test-team', agents: ['nonexistent', 'beta'] }; + const ctx = { config, flags: {}, tui: { renderDiff: () => null }, skillManager: null }; + const result = await runTeam(teamDef, 'task', ctx, agentLoader); + assert.ok(result.perAgent[0].error, 'first agent should have error'); + assert.equal(result.perAgent[0].name, 'nonexistent'); + assert.equal(result.perAgent[1].name, 'beta'); + assert.ok(!result.perAgent[1].error, 'beta should succeed'); + assert.equal(result.output, 'beta output'); + }); +}); + +test('runTeam: accumulates tokens across agents', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(makeTextResponse('out', { prompt_tokens: 10, completion_tokens: 5 }), async () => { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const dir = freshProject(); + fs.mkdirSync(path.join(dir, '.smallcode', 'agents'), { recursive: true }); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'a1.md'), '---\nname: a1\ntools: [read_file]\n---\nbody\n'); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'a2.md'), '---\nname: a2\ntools: [read_file]\n---\nbody\n'); + const agentLoader = new AgentLoader(dir); + const teamDef = { name: 'tok-team', agents: ['a1', 'a2'] }; + const ctx = { config, flags: {}, tui: { renderDiff: () => null }, skillManager: null }; + return runTeam(teamDef, 'task', ctx, agentLoader); + }); + // 2 agents × 15 tokens each = 30 total + assert.equal(result.tokens, 30); + assert.equal(result.perAgent.length, 2); +}); From d0891fb4c7f7c5940ffd331771d5603bab193952 Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 09:41:16 -0700 Subject: [PATCH 13/27] feat: add --task flag to boot TUI and auto-seed initial prompt Boot the interactive fullscreen (or classic) TUI and immediately fire a user-supplied prompt without entering non-interactive mode. The user can continue typing after the seeded run completes. Wired in three places: - Arg parser: `else if (arg === '--task') { flags.task = args[++i]; }` - main() dispatch: `!flags.task &&` guard prevents --task routing to runNonInteractive - runTUI(): setImmediate(runAgentLoop) in both fullscreen and classic branches Co-Authored-By: Claude Sonnet 4.6 --- bin/smallcode.js | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/bin/smallcode.js b/bin/smallcode.js index 3df128ff..e0b43c53 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -164,6 +164,7 @@ for (let i = 0; i < args.length; i++) { else if (arg === '-p' || arg === '--provider') { flags.provider = args[++i]; } else if (arg === '--endpoint' || arg === '--base-url') { flags.endpoint = args[++i]; } else if (arg === '-P' || arg === '--prompt') { flags.prompt = args[++i]; } + else if (arg === '--task') { flags.task = args[++i]; } else if (arg === '--eval') { flags.eval = args[++i] || 'classify_accuracy'; } else if (arg === '--trace') { flags.trace = args[++i]; } else positional.push(arg); @@ -189,6 +190,7 @@ OPTIONS: -p, --provider Provider (ollama, openai, anthropic, llamacpp) --endpoint OpenAI-compatible endpoint/base URL -P, --prompt Run a single prompt non-interactively + --task Boot the interactive TUI and auto-run TEXT as the first prompt -r, --resume Resume last active session --non-interactive Run single prompt, no TUI --classic Use classic readline TUI (no alternate screen) @@ -329,6 +331,16 @@ async function runTUI(config) { screen.enter(); _fullscreenRef = screen; + // Auto-seed: if --task was given, fire its text through onSubmit once the event loop starts + if (flags.task) { + setImmediate(async () => { + screen.setStreaming(true); + await runAgentLoop(flags.task, config); + screen.setStreaming(false); + if (tokenTracker) screen.setTokenInfo(tokenTracker.formatShort()); + }); + } + // Track current tool name for pairing stdout.write (tool start) with console.log (result) let _currentToolName = ''; @@ -377,6 +389,17 @@ async function runTUI(config) { rl.prompt(); + // Auto-seed: if --task was given, run it once before waiting for user input + if (flags.task) { + setImmediate(async () => { + console.log(''); + await runAgentLoop(flags.task, config); + console.log(''); + console.log(tui.renderStatus(config, conversationHistory.length)); + rl.prompt(); + }); + } + rl.on('line', async (line) => { const input = line.trim(); if (!input) { rl.prompt(); return; } @@ -3130,7 +3153,8 @@ async function main() { return; } - if (flags.nonInteractive || flags.prompt || positional.length > 0) { + // --task boots the interactive TUI and auto-seeds the first prompt; never non-interactive + if (!flags.task && (flags.nonInteractive || flags.prompt || positional.length > 0)) { const prompt = flags.prompt || positional.join(' '); await runNonInteractive(config, prompt); return; From 2249b64381ea23eb422261c063e32d7d71481ed2 Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 10:27:33 -0700 Subject: [PATCH 14/27] feat(read-guard): uncap tool reads for large-window models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SMALLCODE_MAX_TOOL_RESULT_CHARS now accepts 0/none/unlimited/off to disable tool-result trimming entirely, and when unset the default scales with the model window — large-window models (>=131072 tokens, e.g. minimax-m3's 512K) are left uncapped, since the read guard only exists to protect small windows. Small models keep the 8000-char guard. Co-Authored-By: Claude Opus 4.8 --- bin/smallcode.js | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/bin/smallcode.js b/bin/smallcode.js index e0b43c53..8aea3fe7 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -1300,13 +1300,31 @@ async function runAgentLoop(userMessage, config) { // or — when context is already pressured — a head-only trim that // tells the model to grep first instead of re-reading. See // src/session/read_guard.js for the rationale. - // Override with SMALLCODE_MAX_TOOL_RESULT_CHARS env var. + // Cap tool results to protect small-model context. Controls: + // SMALLCODE_MAX_TOOL_RESULT_CHARS= explicit char cap + // SMALLCODE_MAX_TOOL_RESULT_CHARS=0|none|unlimited|off NO cap at all + // (unset) default scales with the model window — large-window models + // (>=131072 tokens, e.g. minimax-m3's 512K) are left UNCAPPED + // since trimming only exists to protect small windows; small + // models keep the 8000-char guard. const toolContent = result.result || result.error || ''; - const maxToolResultChars = parseInt(process.env.SMALLCODE_MAX_TOOL_RESULT_CHARS) || 8000; + const _rawCap = String(process.env.SMALLCODE_MAX_TOOL_RESULT_CHARS || '').trim().toLowerCase(); + const _detectedWindow = Number(config?.context?.detected_window) || 0; + let maxToolResultChars; + if (_rawCap === '0' || _rawCap === 'none' || _rawCap === 'unlimited' || _rawCap === 'off') { + maxToolResultChars = Infinity; // explicit "remove the cap" + } else if (_rawCap) { + maxToolResultChars = parseInt(_rawCap) || 8000; + } else { + maxToolResultChars = _detectedWindow >= 131072 ? Infinity : 8000; + } + const unlimited = !Number.isFinite(maxToolResultChars); const headLines = parseInt(process.env.SMALLCODE_READ_GUARD_HEAD_LINES) || 30; - const guardOff = String(process.env.SMALLCODE_READ_GUARD || 'true').toLowerCase() === 'false'; + const guardOff = unlimited || String(process.env.SMALLCODE_READ_GUARD || 'true').toLowerCase() === 'false'; let cappedContent; - if (guardOff) { + if (unlimited) { + cappedContent = toolContent; // no trimming whatsoever + } else if (guardOff) { cappedContent = toolContent.length > maxToolResultChars ? toolContent.slice(0, maxToolResultChars - 200) + '\n\n...(truncated, ' + toolContent.length + ' chars total)...\n' + toolContent.slice(-200) : toolContent; From 95353b6f6b84a4bff6849909ae0cbce30e1f5080 Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 11:12:59 -0700 Subject: [PATCH 15/27] feat(agents): bundled default agent pack + teams; loader fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - agent_loader.js / team_loader.js: load bundled package-root agents/ and teams/ dirs first, then project .smallcode/ dirs, so project files win over bundled defaults (mirrors skills.js pattern). - agents/: 10 bundled agents — scout, code-engineer, critic, debugger, oracle, planner, qa-tester, red-team, documenter, librarian — concise SmallCode-native prompts with correct tool lists and model tiers. - teams/: 3 bundled teams — build, review, debug. - test/agent_loader.test.js: update 4 tests that assumed empty loader on an empty project; now assert bundled defaults are present and named agents are reachable. Co-Authored-By: Claude Opus 4.8 --- agents/code-engineer.md | 31 +++++++++++++++++++++++++++++++ agents/critic.md | 33 +++++++++++++++++++++++++++++++++ agents/debugger.md | 31 +++++++++++++++++++++++++++++++ agents/documenter.md | 29 +++++++++++++++++++++++++++++ agents/librarian.md | 30 ++++++++++++++++++++++++++++++ agents/oracle.md | 33 +++++++++++++++++++++++++++++++++ agents/planner.md | 31 +++++++++++++++++++++++++++++++ agents/qa-tester.md | 27 +++++++++++++++++++++++++++ agents/red-team.md | 27 +++++++++++++++++++++++++++ agents/scout.md | 21 +++++++++++++++++++++ src/plugins/agent_loader.js | 13 +++++++++++-- src/plugins/team_loader.js | 13 +++++++++++-- teams/build.yaml | 3 +++ teams/debug.yaml | 3 +++ teams/review.yaml | 3 +++ test/agent_loader.test.js | 30 +++++++++++++++++++++--------- 16 files changed, 345 insertions(+), 13 deletions(-) create mode 100644 agents/code-engineer.md create mode 100644 agents/critic.md create mode 100644 agents/debugger.md create mode 100644 agents/documenter.md create mode 100644 agents/librarian.md create mode 100644 agents/oracle.md create mode 100644 agents/planner.md create mode 100644 agents/qa-tester.md create mode 100644 agents/red-team.md create mode 100644 agents/scout.md create mode 100644 teams/build.yaml create mode 100644 teams/debug.yaml create mode 100644 teams/review.yaml diff --git a/agents/code-engineer.md b/agents/code-engineer.md new file mode 100644 index 00000000..f1a06bcb --- /dev/null +++ b/agents/code-engineer.md @@ -0,0 +1,31 @@ +--- +name: code-engineer +description: Primary implementer for any coding task — implementation, refactoring, debugging, code review. +model: medium +tools: [read_file, find_files, search, write_file, append_file, patch, bash, run_tests, run] +--- + +You are the code-engineer — a senior engineer and the primary coding agent. You write clean, idiomatic code, match existing patterns, and ship working solutions. + +## Operating Principles + +- Read before writing: understand existing patterns before adding new code. +- Match conventions: if the codebase uses X, use X. +- Minimum viable change: fix the thing, don't refactor everything nearby. +- Verify your work: run run_tests or bash checks after changes. + +## Code Quality Non-Negotiables + +- No empty catch blocks. No TODOs in delivered code. Fix root causes, not symptoms. + +## When to Escalate + +Delegate complex architecture to oracle, external docs to librarian, codebase discovery to scout, test writing to qa-tester. + +## Workflow + +1. Explore relevant code (find_files, search, read_file). +2. Plan briefly — a mental model, not a document. +3. Implement using write_file, patch, or append_file. +4. Verify with run_tests or bash. +5. Report concisely: what changed, why, outcome. diff --git a/agents/critic.md b/agents/critic.md new file mode 100644 index 00000000..efe934b4 --- /dev/null +++ b/agents/critic.md @@ -0,0 +1,33 @@ +--- +name: critic +description: Ruthless post-implementation verifier — rejects work that doesn't meet spec. Read-only except running checks. +model: medium +tools: [read_file, find_files, search, bash, run_tests] +--- + +You are the quality critic — the final gate before anything ships. You ruthlessly verify that work meets its requirements. You do not rubber-stamp. If something is wrong, you reject it with specifics. + +## How You Work + +1. Read the spec or requirements: understand exactly what was required. +2. Read the implementation: every changed file. +3. Verify line by line: does the code do what was required? Any stubs, TODOs, or logic errors? +4. Run checks: use run_tests and bash to verify, not just read. +5. Report with a clear verdict. + +## Output Format + +``` +Files reviewed: [list] +Issues found: +- CRITICAL: [file:line] — [specific issue] +- WARNING: [file:line] — [issue] + +VERDICT: OKAY / REJECT +``` + +If REJECT: explain exactly what must be fixed. Never approve with reservations — "probably fine" = REJECT. + +## Rejection Triggers + +Any stub or TODO in delivered code; logic that doesn't match spec; missing error handling; unverified claims; scope creep. diff --git a/agents/debugger.md b/agents/debugger.md new file mode 100644 index 00000000..e9f17596 --- /dev/null +++ b/agents/debugger.md @@ -0,0 +1,31 @@ +--- +name: debugger +description: Systematic root-cause diagnosis — reproduce, hypothesize, test, fix, verify. +model: medium +tools: [read_file, find_files, search, bash, run_tests, patch] +--- + +You are the debugger — a systematic root-cause diagnostician. Your role is to find WHY something is broken, not just make it work. Follow the scientific method: observe, hypothesize, test, conclude. + +## How You Work + +1. Reproduce: confirm the bug exists; understand the exact failure mode using run_tests or bash. +2. Gather evidence: read error logs, stack traces, and relevant code paths with read_file and search. +3. Form hypotheses: list 2–3 plausible root causes, ranked by likelihood. +4. Test systematically: eliminate hypotheses one by one with targeted bash or run_tests checks. +5. Fix: use patch to implement the minimal fix for the confirmed root cause. +6. Verify: run_tests confirms the fix resolves the issue without regression. + +## Principles + +Never guess-and-check randomly. Each action tests a specific hypothesis. Check recent changes (bash git log) — most bugs come from recent commits. If a fix works but you don't understand why, keep investigating. + +## Output Format + +``` +SYMPTOM: [what's happening] +EVIDENCE: [key observations] +ROOT CAUSE: [confirmed cause] +FIX: [what was changed and why] +VERIFICATION: [how confirmed] +``` diff --git a/agents/documenter.md b/agents/documenter.md new file mode 100644 index 00000000..50649ed8 --- /dev/null +++ b/agents/documenter.md @@ -0,0 +1,29 @@ +--- +name: documenter +description: Writes and updates docs — READMEs, inline comments, usage examples — matching the project's existing style. +model: fast +tools: [read_file, find_files, search, write_file, append_file, patch] +--- + +You are a documentation agent. Write clear, concise documentation that matches the project's existing style and voice. + +## How You Work + +1. Survey existing docs: use find_files and read_file to understand the project's documentation style, tone, and structure. +2. Survey the code: use search and read_file to understand what needs documenting. +3. Write or update: use write_file, append_file, or patch to add or revise docs. + +## What You Produce + +- README files (top-level and per-module). +- Inline code comments for non-obvious logic. +- Usage examples with working code snippets. +- API reference tables (function signatures, parameters, return values). +- Migration or changelog entries when appropriate. + +## Style Rules + +- Match the existing doc tone exactly — don't introduce new conventions. +- Be concise: say what it does, not how the implementation works. +- Code examples must be accurate — verify against the actual source. +- No placeholder text or TODOs in delivered docs. diff --git a/agents/librarian.md b/agents/librarian.md new file mode 100644 index 00000000..a6a72716 --- /dev/null +++ b/agents/librarian.md @@ -0,0 +1,30 @@ +--- +name: librarian +description: External docs and library best-practices lookup — official references, real-world examples, GitHub repo discovery. +model: default +tools: [read_file, search, web_search, web_fetch, memory_load] +--- + +You are the librarian — a reference researcher who finds external documentation, code examples, and best practices from outside the codebase. + +## How You Work + +1. Clarify what specifically is needed: library name, version, use case, language target. +2. Check memory_load for any previously cached findings on the same topic. +3. Search: use web_search for official docs, GitHub repos, and community resources. +4. Fetch: use web_fetch to retrieve specific pages, changelogs, or API references. +5. Verify by cross-checking multiple sources before synthesizing. +6. Synthesize: return structured findings with source URLs, not raw search dumps. + +## What You Research + +- Official library and framework documentation. +- Real-world code examples from production repositories. +- Best practices, community conventions, security advisories. +- Changelogs and migration guides. +- API references and type definitions. +- GitHub repo discovery and evaluation. + +## Stop Conditions + +Stop when: a direct answer is found from an authoritative source; the same information is confirmed in 2+ independent sources; or 2 search iterations yield no new useful data. Always cite source URLs. diff --git a/agents/oracle.md b/agents/oracle.md new file mode 100644 index 00000000..4802bf1e --- /dev/null +++ b/agents/oracle.md @@ -0,0 +1,33 @@ +--- +name: oracle +description: Read-only architecture advisor — deep analysis, hard debugging, security and performance consulting. +model: strong +tools: [read_file, find_files, search, graph_search, explain_symbol] +--- + +You are the oracle — a read-only, high-reasoning consultant. You analyze deeply, reason carefully, and advise. You never write or modify files. + +## When You Are Invoked + +- Complex architecture decisions with real tradeoffs. +- Hard debugging after 2+ failed attempts by other agents. +- Security or performance concerns requiring deep analysis. +- Multi-system design decisions or technical debt assessment. + +## How You Work + +1. Read deeply: use read_file, search, graph_search, and explain_symbol to understand full context before forming any opinion. +2. Analyze trade-offs: present multiple approaches with pros and cons. +3. Identify root causes: go past symptoms to underlying problems. +4. Give a clear recommendation: one primary path with explicit rationale. +5. List risks: what could go wrong with your recommendation. + +## Output Format + +- Summary of the problem as understood. +- Analysis of approaches considered. +- Recommendation with rationale. +- Key risks and mitigations. +- Concrete next steps for the implementing agent. + +You are READ-ONLY. Everything you produce is advice. diff --git a/agents/planner.md b/agents/planner.md new file mode 100644 index 00000000..acb58db8 --- /dev/null +++ b/agents/planner.md @@ -0,0 +1,31 @@ +--- +name: planner +description: Read-only; researches the codebase and produces a numbered, verifiable step plan before implementation. +model: medium +tools: [read_file, find_files, search, hybrid_search, graph_search] +--- + +You are the strategic planner. Your role is to research the codebase and generate structured work plans. You do not implement — you plan. + +## How You Work + +### Phase 1: Clarify + +Identify the verb the user used (add, refactor, reorganize, rewrite). Your plan scope must not exceed that verb. If an adjacent improvement is out of scope, note it separately and do not include it in the task list. + +### Phase 2: Research + +Use find_files, search, hybrid_search, and graph_search to understand the codebase before writing the plan. + +### Phase 3: Plan Generation + +Produce a plan with: +- TL;DR and deliverables. +- Context and research findings. +- Work objectives with "Must Have" and "Must NOT" sections. +- Numbered task list, each with clear acceptance criteria. +- Wave structure indicating which tasks can run in parallel. + +### Phase 4: Clearance Check + +Before finalizing: are all requirements clear? All gaps resolved? If not, ask one targeted question. diff --git a/agents/qa-tester.md b/agents/qa-tester.md new file mode 100644 index 00000000..b3dfd8a0 --- /dev/null +++ b/agents/qa-tester.md @@ -0,0 +1,27 @@ +--- +name: qa-tester +description: Writes tests, builds test suites, and discovers edge cases across unit, integration, and E2E levels. +model: default +tools: [read_file, find_files, search, write_file, append_file, patch, bash, run_tests] +--- + +You are the QA tester — a testing specialist who writes comprehensive, meaningful tests. You write tests that catch real bugs, not tests that just inflate coverage numbers. + +## How You Work + +1. Understand: use read_file and search to understand the code under test and its requirements. +2. Identify test cases: happy path, edge cases, error conditions, boundary values (0, -1, MAX, empty, null). +3. Write tests: clear, isolated, deterministic. Use write_file or patch to add them. +4. Run tests: use run_tests or bash to verify they pass (and fail when they should). +5. Report coverage gaps: what isn't tested and why it matters. + +## Testing Principles + +- Test behavior, not implementation — tests must survive refactors. +- One assertion per concept. Descriptive test names. +- No test interdependence — each test runs in isolation. +- Match the existing test framework and patterns in the project. + +## Gap Warning Triggers + +Public function with no tests; uncovered error paths; boundary conditions unchecked; async race conditions; state mutations without verification. diff --git a/agents/red-team.md b/agents/red-team.md new file mode 100644 index 00000000..fc56e442 --- /dev/null +++ b/agents/red-team.md @@ -0,0 +1,27 @@ +--- +name: red-team +description: Adversarial security reviewer — find vulnerabilities, injection risks, exposed secrets, and failure modes. Read-only probing. +model: medium +tools: [read_file, find_files, search, bash] +--- + +You are a red team agent. Your role is to find security vulnerabilities, edge cases, and failure modes before attackers do. You probe, you don't patch. + +## How You Work + +1. Map the attack surface: use find_files and search to locate entry points, user inputs, auth boundaries, and external calls. +2. Probe for vulnerabilities: read_file to inspect code; bash for safe static analysis (grep for patterns, no live network calls). +3. Enumerate failure modes: what happens with malformed input, missing auth, concurrent access, or resource exhaustion? + +## What You Look For + +- Injection risks (SQL, shell, path traversal, template). +- Exposed secrets or credentials in code or config. +- Missing or bypassable authentication and authorization. +- Unsafe defaults or overly permissive configurations. +- Unhandled errors that leak internal state. +- SSRF, open redirects, insecure deserialization. + +## Output Format + +Report findings with severity (CRITICAL / HIGH / MEDIUM / LOW), affected file:line, and a concrete reproduction scenario. Do NOT modify files — findings only. diff --git a/agents/scout.md b/agents/scout.md new file mode 100644 index 00000000..fc5424e9 --- /dev/null +++ b/agents/scout.md @@ -0,0 +1,21 @@ +--- +name: scout +description: Fast read-only codebase recon — find files, patterns, functions, and entry points. +model: fast +tools: [read_file, find_files, search, hybrid_search, graph_search, explain_symbol] +--- + +You are the scout — fast, read-only discovery of patterns and structure in the codebase. + +Your role is precise, high-speed exploration. Find things quickly and return structured results. Never modify files — just accurate discovery. + +## How You Work + +1. Parse the query: identify what to find (file, pattern, function, import, symbol). +2. Choose the right tool: use search or hybrid_search for content patterns, find_files for file names, read_file for detail, graph_search or explain_symbol for structural relationships. +3. Parallelize: run independent searches simultaneously. +4. Return precise results: file paths, line numbers, relevant snippets. + +## Output Format + +Always include: file path, line reference, relevant code snippet. For large result sets, group by file and summarize patterns. Keep output tight — no padding, no suggestions, just what was found. diff --git a/src/plugins/agent_loader.js b/src/plugins/agent_loader.js index 60859575..9dfbd16b 100644 --- a/src/plugins/agent_loader.js +++ b/src/plugins/agent_loader.js @@ -32,8 +32,11 @@ class AgentLoader { return path.join(this.projectDir, '.smallcode', 'agents'); } - _load() { - const dir = this._agentDir(); + _bundledDir() { + return path.join(__dirname, '..', '..', 'agents'); + } + + _loadDir(dir) { if (!fs.existsSync(dir)) return; let entries; try { @@ -50,6 +53,12 @@ class AgentLoader { } } + _load() { + // Bundled defaults first; project-level overrides (Map.set overwrites same name) + this._loadDir(this._bundledDir()); + this._loadDir(this._agentDir()); + } + _parseMeta(frontmatter) { const meta = {}; for (const rawLine of frontmatter.split(/\r?\n/)) { diff --git a/src/plugins/team_loader.js b/src/plugins/team_loader.js index 0611adf1..ad8c8375 100644 --- a/src/plugins/team_loader.js +++ b/src/plugins/team_loader.js @@ -28,6 +28,10 @@ class TeamLoader { return path.join(this.projectDir, '.smallcode', 'teams'); } + _bundledDir() { + return path.join(__dirname, '..', '..', 'teams'); + } + _parseLine(line) { const m = line.trim().match(KV_RE); if (!m) return null; @@ -50,8 +54,7 @@ class TeamLoader { return result; } - _load() { - const dir = this._teamDir(); + _loadDir(dir) { if (!fs.existsSync(dir)) return; let entries; try { @@ -78,6 +81,12 @@ class TeamLoader { } } + _load() { + // Bundled defaults first; project-level overrides (Map.set overwrites same name) + this._loadDir(this._bundledDir()); + this._loadDir(this._teamDir()); + } + list() { return [...this._teams.values()].map(t => ({ name: t.name, diff --git a/teams/build.yaml b/teams/build.yaml new file mode 100644 index 00000000..d7f47d25 --- /dev/null +++ b/teams/build.yaml @@ -0,0 +1,3 @@ +name: build +description: Full build pipeline — recon, plan, implement, verify. +agents: [scout, planner, code-engineer, critic] diff --git a/teams/debug.yaml b/teams/debug.yaml new file mode 100644 index 00000000..090d66af --- /dev/null +++ b/teams/debug.yaml @@ -0,0 +1,3 @@ +name: debug +description: Diagnose and advise — systematic debugging paired with architectural insight. +agents: [debugger, oracle] diff --git a/teams/review.yaml b/teams/review.yaml new file mode 100644 index 00000000..dce5c513 --- /dev/null +++ b/teams/review.yaml @@ -0,0 +1,3 @@ +name: review +description: Multi-angle review — correctness, security, and quality assurance. +agents: [critic, red-team, qa-tester] diff --git a/test/agent_loader.test.js b/test/agent_loader.test.js index b4150e9c..5a1c3244 100644 --- a/test/agent_loader.test.js +++ b/test/agent_loader.test.js @@ -24,10 +24,14 @@ function write(file, content) { // ── AgentLoader ─────────────────────────────────────────────────────────────── -test('AgentLoader: missing agents dir returns empty list', () => { +test('AgentLoader: missing project agents dir still returns bundled defaults', () => { const dir = freshProject(); const loader = new AgentLoader(dir); - assert.deepEqual(loader.list(), []); + // Bundled agents are always present; project dir is missing but that's fine + const names = loader.list().map(a => a.name); + assert.ok(names.includes('scout'), 'bundled scout should be present'); + assert.ok(names.includes('code-engineer'), 'bundled code-engineer should be present'); + // Unknown agent name still returns null assert.equal(loader.get('anything'), null); }); @@ -103,25 +107,33 @@ test('AgentLoader: drafts/ subdirectory is quarantined (never loaded)', () => { ); const loader = new AgentLoader(dir); assert.equal(loader.get('draft-agent'), null, 'draft agent must not auto-load'); - assert.equal(loader.list().length, 0); + // Only bundled defaults present — no project agents aside from the quarantined draft + const names = loader.list().map(a => a.name); + assert.ok(!names.includes('draft-agent'), 'draft-agent must not appear in list'); }); -test('AgentLoader: multiple agents coexist', () => { +test('AgentLoader: multiple agents coexist and project agents are accessible', () => { const dir = freshProject(); write(path.join(dir, '.smallcode', 'agents', 'a.md'), '---\nname: alpha\ntools: [read_file]\n---\nbody a\n'); write(path.join(dir, '.smallcode', 'agents', 'b.md'), '---\nname: beta\ntools: [bash]\n---\nbody b\n'); const loader = new AgentLoader(dir); - assert.equal(loader.list().length, 2); - assert.ok(loader.get('alpha')); - assert.ok(loader.get('beta')); + // Both project-defined agents must be present (bundled defaults are also loaded) + assert.ok(loader.get('alpha'), 'alpha must be present'); + assert.ok(loader.get('beta'), 'beta must be present'); + // Total count is project agents + bundled defaults (at least 2 project) + assert.ok(loader.list().length >= 2, 'should have at least the two project agents'); }); // ── TeamLoader ──────────────────────────────────────────────────────────────── -test('TeamLoader: missing teams dir returns empty list', () => { +test('TeamLoader: missing project teams dir still returns bundled defaults', () => { const dir = freshProject(); const loader = new TeamLoader(dir); - assert.deepEqual(loader.list(), []); + // Bundled teams are always present; project dir is missing but that's fine + const names = loader.list().map(t => t.name); + assert.ok(names.includes('build'), 'bundled build team should be present'); + assert.ok(names.includes('debug'), 'bundled debug team should be present'); + // Unknown team name still returns null assert.equal(loader.get('anything'), null); }); From 4b76bd9e643541ccded1fb8f1a0721a9bee1914b Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 11:15:47 -0700 Subject: [PATCH 16/27] feat(agents): add general-purpose agent for open-ended/authoring tasks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a catch-all bundled agent geared toward content authoring and text transformation (remaster/rewrite/summarize from source + a prompt) — the name small models reach for by default (spawn_agent "general-purpose"). Follows a named prompt/template when the task references one, reads source fully, and writes the actual output artifact. Co-Authored-By: Claude Opus 4.8 --- agents/general-purpose.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 agents/general-purpose.md diff --git a/agents/general-purpose.md b/agents/general-purpose.md new file mode 100644 index 00000000..275c3b3f --- /dev/null +++ b/agents/general-purpose.md @@ -0,0 +1,28 @@ +--- +name: general-purpose +description: Catch-all agent for open-ended, multi-step tasks — research, content authoring, and text transformation (e.g. remastering/rewriting a section per a prompt or spec). Use when no more specific agent fits. +model: medium +tools: [read_file, find_files, search, hybrid_search, write_file, append_file, patch, bash, run_tests, run, memory_load] +--- + +You are the general-purpose agent — the default for tasks that don't fit a specialist. You handle research, multi-step work, and especially **content authoring and text transformation**: rewriting, remastering, summarizing, or generating a document from source material and an instruction. + +## Operating Principles + +- Understand the contract first. If the task names a prompt/template (e.g. a file under `prompts/`) or a spec, read it and follow it exactly — it defines the output's structure, voice, and rules. +- Read the source fully before writing. For a remaster/rewrite, read the input section AND any sibling examples so your output matches the established style. +- Match conventions: headings, tags, numbering, and formatting the surrounding files already use. +- Produce the actual artifact. Write the output to the file path the task specifies (write_file for new files, append_file to build large files in chunks, patch for edits) — don't just describe what you would do. +- Verify what you can: re-read your output, run any lint/check command the task mentions. + +## Workflow + +1. Read the instruction/prompt + the source material (read_file, find_files, search). +2. Author the output, following the prompt's structure and the project's conventions. +3. Write it to the specified path; for long content, write a first chunk then append the rest. +4. Sanity-check the result (re-read; run any stated verify/lint command). +5. Report concisely: what you produced, where, and any caveats. + +## When to Escalate + +Defer deep architecture to oracle, codebase discovery to scout, dedicated test authoring to qa-tester, and external library research to librarian. From 8fcc51886a77d7a0eb6f1d0c312fcd1b8231d936 Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 11:19:40 -0700 Subject: [PATCH 17/27] fix(tui): keyboard nav, right-click paste, /provider in TUI (#80,#93,#96) #93 Add line/word navigation to the fullscreen input: Home/End (and Ctrl+A/Ctrl+E), Ctrl+Left/Right word jumps, Ctrl+W / Ctrl+Backspace word-delete-left, Ctrl+Delete word-delete-right, and forward Delete. Previously only plain arrows and Backspace were handled. #96 Honor right-click as paste. SGR mouse tracking makes the terminal forward right-clicks to the app instead of showing its native paste menu, which broke right-click paste on Linux. Handle SGR button-2 release as a clipboard paste; refactor the Ctrl+V clipboard read into a shared _pasteFromClipboard() helper. #80 Make /provider work in the fullscreen TUI. It now appears in the command palette, and since its interactive wizard cannot run inside the captured-stdout TUI, /provider shows current provider status plus guidance to use /endpoint, /model, or the shell wizard instead of silently doing nothing. #76 Document the already-supported model response timeout (SMALLCODE_MODEL_TIMEOUT / smallcode.toml [model].timeout) in the README. Adds test/input_editing.test.js (10 cases). Full suite: 410 passing. Co-Authored-By: Claude Opus 4.8 --- README.md | 8 +++ bin/smallcode.js | 18 +++++- src/tui/fullscreen.js | 126 +++++++++++++++++++++++++++++++------ test/input_editing.test.js | 111 ++++++++++++++++++++++++++++++++ 4 files changed, 242 insertions(+), 21 deletions(-) create mode 100644 test/input_editing.test.js diff --git a/README.md b/README.md index 062899c3..88da0530 100644 --- a/README.md +++ b/README.md @@ -142,8 +142,16 @@ SMALLCODE_BASE_URL=http://localhost:1234/v1 # OPENAI_API_KEY=sk-... # OPENROUTER_API_KEY=sk-or-v1-... # DEEPSEEK_API_KEY=sk-... + +# Optional: model response timeout in seconds (default 300 / 5 min). +# Raise this for slow CPU-only llama.cpp servers that need >5 min per turn. +# SMALLCODE_MODEL_TIMEOUT=1800 ``` +The model response timeout can also be set in `smallcode.toml` under `[model]` +as `timeout = `. If a turn exceeds it you'll see +`timeout: no response after s` — raise `SMALLCODE_MODEL_TIMEOUT` to fix. + See `.env.example` for all options. Also supports `smallcode.toml` for backwards compatibility. SmallCode can route each model tier to a different endpoint. This lets you keep diff --git a/bin/smallcode.js b/bin/smallcode.js index 8aea3fe7..470b8e76 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -302,8 +302,24 @@ async function runTUI(config) { console.log = (...args) => { captured += args.join(' ') + '\n'; }; // Create a mock rl for command handler const mockRl = { prompt: () => {}, close: () => { screen.leave(); process.exit(0); } }; + // /provider's interactive wizard needs a real stdin/stdout, which the + // fullscreen TUI captures — so it silently did nothing (issue #80). + // Inside the TUI, surface the current provider/model status instead and + // point the user at the paths that DO work here (/endpoint, /model) or + // the shell wizard. + const provMatch = /^\/provider\b/.test(cmd); + const provSub = cmd.replace(/^\/provider\s*/, '').trim(); try { - await handleCmd(cmd, mockRl); + if (provMatch && provSub !== 'status' && provSub !== '--status' && provSub !== '-s') { + await handleCmd('/provider status', mockRl); + captured += '\n The interactive provider wizard needs a real terminal and'; + captured += '\n cannot run inside the full-screen TUI. To reconfigure:'; + captured += '\n • /endpoint — switch the API base URL here'; + captured += '\n • /model — switch the model here'; + captured += '\n • run `smallcode /provider` from your shell for the full wizard'; + } else { + await handleCmd(cmd, mockRl); + } } catch (e) { captured += `Error: ${e.message}\n`; } diff --git a/src/tui/fullscreen.js b/src/tui/fullscreen.js index 96b77aa5..582e52c5 100644 --- a/src/tui/fullscreen.js +++ b/src/tui/fullscreen.js @@ -54,6 +54,22 @@ function visualCursorPosition(str, cursorIdx, maxVisualWidth) { return { line, col }; } +// Word-boundary helpers for input editing (issue #93). A "word" is a run of +// non-whitespace characters. Movement skips any whitespace adjacent to the +// cursor before scanning over the word, mirroring readline/Windows behaviour. +function prevWordBoundary(str, idx) { + let i = idx; + while (i > 0 && /\s/.test(str[i - 1])) i--; // skip whitespace to the left + while (i > 0 && !/\s/.test(str[i - 1])) i--; // skip the word itself + return i; +} +function nextWordBoundary(str, idx) { + let i = idx; + while (i < str.length && /\s/.test(str[i])) i++; // skip whitespace to the right + while (i < str.length && !/\s/.test(str[i])) i++; // skip the word itself + return i; +} + // ─── ANSI Escape Sequences ─────────────────────────────────────────────────── const ESC = '\x1b['; @@ -182,6 +198,7 @@ class FullScreenTUI { { cmd: '/quit', alias: '/q', desc: 'Exit SmallCode' }, { cmd: '/clear', alias: null, desc: 'Reset conversation' }, { cmd: '/model', alias: null, desc: 'Show/switch model' }, + { cmd: '/provider', alias: null, desc: 'Show provider / configure model' }, { cmd: '/endpoint', alias: null, desc: 'Switch API endpoint' }, { cmd: '/stats', alias: null, desc: 'Session statistics' }, { cmd: '/tokens', alias: null, desc: 'Token usage report' }, @@ -891,6 +908,60 @@ class FullScreenTUI { return; } + // ─── Line / word navigation (issue #93) ────────────────────────────── + // Home / Ctrl+A — start of line. Terminals send Home as \x1b[H, \x1b[1~, + // or \x1bOH depending on mode; Ctrl+A arrives as the raw byte \x01. + if (key === '\x1b[H' || key === '\x1b[1~' || key === '\x1bOH' || key === '\x01') { + this.inputCursor = 0; + this.render(); + return; + } + // End / Ctrl+E — end of line (\x1b[F, \x1b[4~, \x1bOF, or Ctrl+E = \x05). + if (key === '\x1b[F' || key === '\x1b[4~' || key === '\x1bOF' || key === '\x05') { + this.inputCursor = this.inputBuffer.length; + this.render(); + return; + } + // Ctrl+Left — previous word (\x1b[1;5D, and Alt+B = \x1bb as a fallback). + if (key === '\x1b[1;5D' || key === '\x1b[1;3D' || key === '\x1bb') { + this.inputCursor = prevWordBoundary(this.inputBuffer, this.inputCursor); + this.render(); + return; + } + // Ctrl+Right — next word (\x1b[1;5C, and Alt+F = \x1bf as a fallback). + if (key === '\x1b[1;5C' || key === '\x1b[1;3C' || key === '\x1bf') { + this.inputCursor = nextWordBoundary(this.inputBuffer, this.inputCursor); + this.render(); + return; + } + // Ctrl+Backspace / Ctrl+W — delete the word to the left of the cursor. + // Ctrl+Backspace reaches us as \x17 (Ctrl+W) or \x1b\x7f on many terminals. + if (key === '\x17' || key === '\x1b\x7f' || key === '\x1b\b') { + const start = prevWordBoundary(this.inputBuffer, this.inputCursor); + this.inputBuffer = this.inputBuffer.slice(0, start) + this.inputBuffer.slice(this.inputCursor); + this.inputCursor = start; + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + this.render(); + return; + } + // Ctrl+Delete — delete the word to the right of the cursor (\x1b[3;5~). + if (key === '\x1b[3;5~' || key === '\x1b[3;3~') { + const end = nextWordBoundary(this.inputBuffer, this.inputCursor); + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + this.inputBuffer.slice(end); + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + this.render(); + return; + } + // Delete (forward) — remove the character under the cursor (\x1b[3~). + if (key === '\x1b[3~') { + if (this.inputCursor < this.inputBuffer.length) { + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + this.inputBuffer.slice(this.inputCursor + 1); + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + } + this.render(); + return; + } + // Scroll chat — PgUp/PgDn, Shift+Up/Down, mouse wheel if (key === '\x1b[5~' || key === '\x1b[1;2A') { // PgUp or Shift+Up const maxBack = -(Math.max(0, this.chatLines.length - this.chatHeight)); @@ -917,6 +988,15 @@ class FullScreenTUI { this.render(); return; } + // Right-click — paste from clipboard (issue #96). Enabling SGR mouse + // tracking makes the terminal forward right-clicks to us instead of + // showing its native paste menu, so we honour the gesture ourselves. + // SGR button 2 (right) press is "\x1b[<2;X;YM", release "\x1b[<2;X;Ym". + if (/^\x1b\[<2;\d+;\d+m$/.test(key)) { + this._pasteFromClipboard(); + return; + } + // Mouse press / drag / release (SGR) — text selection in the chat panel. // Only the chat region selects; tool panel and input area are ignored. if (key.includes('\x1b[<')) { @@ -929,27 +1009,9 @@ class FullScreenTUI { return; } - // Ctrl+V — paste from clipboard (Windows) + // Ctrl+V — paste from clipboard (issue #96: right-click also routes here) if (key === '\x16') { - try { - const { execSync } = require('child_process'); - let clipboard = ''; - if (process.platform === 'win32') { - clipboard = execSync('powershell -command "Get-Clipboard"', { encoding: 'utf-8', timeout: 3000 }).trim(); - } else if (process.platform === 'darwin') { - clipboard = execSync('pbpaste', { encoding: 'utf-8', timeout: 3000 }).trim(); - } else { - clipboard = execSync('xclip -selection clipboard -o 2>/dev/null || xsel --clipboard --output 2>/dev/null', { encoding: 'utf-8', timeout: 3000, shell: true }).trim(); - } - if (clipboard) { - // Replace newlines with spaces for input line - const text = clipboard.replace(/[\r\n]+/g, ' '); - this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + text + this.inputBuffer.slice(this.inputCursor); - this.inputCursor += text.length; - this.commandPaletteOpen = this.inputBuffer.startsWith('/'); - this.render(); - } - } catch {} + this._pasteFromClipboard(); return; } @@ -976,6 +1038,30 @@ class FullScreenTUI { } } + // Insert clipboard contents at the cursor. Shared by Ctrl+V and the + // right-click gesture (issue #96). Newlines collapse to spaces so the + // single-line input stays intact. + _pasteFromClipboard() { + try { + const { execSync } = require('child_process'); + let clipboard = ''; + if (process.platform === 'win32') { + clipboard = execSync('powershell -command "Get-Clipboard"', { encoding: 'utf-8', timeout: 3000 }).trim(); + } else if (process.platform === 'darwin') { + clipboard = execSync('pbpaste', { encoding: 'utf-8', timeout: 3000 }).trim(); + } else { + clipboard = execSync('xclip -selection clipboard -o 2>/dev/null || xsel --clipboard --output 2>/dev/null', { encoding: 'utf-8', timeout: 3000, shell: true }).trim(); + } + if (clipboard) { + const text = clipboard.replace(/[\r\n]+/g, ' '); + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + text + this.inputBuffer.slice(this.inputCursor); + this.inputCursor += text.length; + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + this.render(); + } + } catch {} + } + _onResize() { this._computeLayout(); this.render(); diff --git a/test/input_editing.test.js b/test/input_editing.test.js new file mode 100644 index 00000000..c04ed0c1 --- /dev/null +++ b/test/input_editing.test.js @@ -0,0 +1,111 @@ +'use strict'; + +// SmallCode — input line editing tests (issues #93, #96) +// Line/word navigation and right-click paste in the fullscreen TUI input. +// Drives _onKeypress directly with raw key bytes and asserts on the resulting +// inputBuffer / inputCursor state. render() is stubbed so no terminal is needed. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { FullScreenTUI } = require('../src/tui/fullscreen'); + +function makeTui(buffer = '', cursor = null) { + const tui = new FullScreenTUI(); + tui.render = () => {}; // no terminal in tests + tui.inputBuffer = buffer; + tui.inputCursor = cursor == null ? buffer.length : cursor; + return tui; +} + +const send = (tui, key) => tui._onKeypress(Buffer.from(key, 'binary')); + +// ─── Line navigation (issue #93) ─────────────────────────────────────────── + +test('Home (\\x1b[H) and Ctrl+A (\\x01) move to start of line', async () => { + for (const key of ['\x1b[H', '\x1b[1~', '\x01']) { + const tui = makeTui('hello world'); + await send(tui, key); + assert.equal(tui.inputCursor, 0, `key ${JSON.stringify(key)}`); + } +}); + +test('End (\\x1b[F) and Ctrl+E (\\x05) move to end of line', async () => { + for (const key of ['\x1b[F', '\x1b[4~', '\x05']) { + const tui = makeTui('hello world', 0); + await send(tui, key); + assert.equal(tui.inputCursor, 11, `key ${JSON.stringify(key)}`); + } +}); + +// ─── Word navigation (issue #93) ─────────────────────────────────────────── + +test('Ctrl+Left (\\x1b[1;5D) jumps to the previous word boundary', async () => { + const tui = makeTui('hello world foo'); // cursor at end (15) + await send(tui, '\x1b[1;5D'); + assert.equal(tui.inputCursor, 12); // start of "foo" + await send(tui, '\x1b[1;5D'); + assert.equal(tui.inputCursor, 6); // start of "world" +}); + +test('Ctrl+Right (\\x1b[1;5C) jumps to the next word boundary', async () => { + const tui = makeTui('hello world foo', 0); + await send(tui, '\x1b[1;5C'); + assert.equal(tui.inputCursor, 5); // end of "hello" + await send(tui, '\x1b[1;5C'); + assert.equal(tui.inputCursor, 11); // end of "world" +}); + +// ─── Word / char deletion (issue #93) ────────────────────────────────────── + +test('Ctrl+W (\\x17) deletes the word to the left of the cursor', async () => { + const tui = makeTui('hello world foo'); + await send(tui, '\x17'); + assert.equal(tui.inputBuffer, 'hello world '); + assert.equal(tui.inputCursor, 12); +}); + +test('Ctrl+Delete (\\x1b[3;5~) deletes the word to the right', async () => { + const tui = makeTui('hello world foo', 6); // cursor before "world" + await send(tui, '\x1b[3;5~'); + assert.equal(tui.inputBuffer, 'hello foo'); + assert.equal(tui.inputCursor, 6); +}); + +test('Delete (\\x1b[3~) removes the character under the cursor', async () => { + const tui = makeTui('abc', 1); + await send(tui, '\x1b[3~'); + assert.equal(tui.inputBuffer, 'ac'); + assert.equal(tui.inputCursor, 1); +}); + +test('word-delete keeps the command palette state in sync', async () => { + const tui = makeTui('/model gpt', 10); + await send(tui, '\x17'); // delete the "gpt" argument + assert.equal(tui.inputBuffer, '/model '); + assert.equal(tui.commandPaletteOpen, true); // still a slash command +}); + +// ─── Right-click paste (issue #96) ───────────────────────────────────────── + +test('right-click release pastes clipboard at the cursor', async () => { + const tui = makeTui('ab', 1); + tui._pasteFromClipboard = function () { // stub the OS clipboard read + const text = 'XY'; + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + text + this.inputBuffer.slice(this.inputCursor); + this.inputCursor += text.length; + }; + await send(tui, '\x1b[<2;10;5m'); // SGR button-2 (right) release + assert.equal(tui.inputBuffer, 'aXYb'); + assert.equal(tui.inputCursor, 3); +}); + +test('right-click press and left-click do not trigger paste', async () => { + let pasted = false; + const tui = makeTui('ab', 1); + tui._pasteFromClipboard = () => { pasted = true; }; + tui._onMouseSelect = () => true; // swallow selection handling + await send(tui, '\x1b[<2;10;5M'); // right-button PRESS (uppercase M) + await send(tui, '\x1b[<0;10;5m'); // left-button release + assert.equal(pasted, false); +}); From 13cfc2fa4e70fcd9039eae3819e2cc76696ec64a Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 11:25:12 -0700 Subject: [PATCH 18/27] feat(quality-monitor): SMALLCODE_QUALITY_MONITOR_QUIET suppresses the warning line but keeps the corrective steer Co-Authored-By: Claude Opus 4.8 --- bin/smallcode.js | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bin/smallcode.js b/bin/smallcode.js index 470b8e76..5371ba07 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -1116,8 +1116,15 @@ async function runAgentLoop(userMessage, config) { .filter(Boolean); const signal = qualityMonitor.inspect({ message, knownTools }); if (signal) { - if (_fullscreenRef) _fullscreenRef.addTool('quality', 'warn', signal.kind); - else console.log(` \x1b[33m⚠ quality-monitor: ${signal.kind}\x1b[0m`); + // SMALLCODE_QUALITY_MONITOR_QUIET=true suppresses the visible warning + // line but KEEPS the corrective steer (the injection below) — useful for + // driven/non-interactive runs where the ⚠ noise isn't wanted but the + // model should still be told the correct tool name. + const quiet = String(process.env.SMALLCODE_QUALITY_MONITOR_QUIET || 'false').toLowerCase() === 'true'; + if (!quiet) { + if (_fullscreenRef) _fullscreenRef.addTool('quality', 'warn', signal.kind); + else console.log(` \x1b[33m⚠ quality-monitor: ${signal.kind}\x1b[0m`); + } conversationHistory.push({ role: 'assistant', content: message.content || '' }); conversationHistory.push({ role: 'user', content: signal.injection }); continue; From 8da0708b3db1e8623497c8d5ec9f157d81a65af4 Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 11:33:17 -0700 Subject: [PATCH 19/27] refactor(tui): extract resolveTuiCommand helper + tests (#80) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the /provider TUI-routing logic out of the inline onCommand handler in bin/smallcode.js into bin/tui_commands.js as a pure resolveTuiCommand(cmd) function returning { command, guidance }. Behavior is unchanged — the guidance text is byte-identical to the previous inline version — but the mapping is now unit-testable without booting the full TUI. Adds test/tui_commands.test.js (7 cases): bare /provider reroutes to status + guidance, status/--status/-s pass through, unknown subcommands reroute, non-provider commands untouched, /providerx word-boundary, and defensive handling of empty/undefined input. Full suite: 417 passing. Co-Authored-By: Claude Opus 4.8 --- bin/smallcode.js | 25 ++++++----------- bin/tui_commands.js | 44 +++++++++++++++++++++++++++++ test/tui_commands.test.js | 59 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 17 deletions(-) create mode 100644 bin/tui_commands.js create mode 100644 test/tui_commands.test.js diff --git a/bin/smallcode.js b/bin/smallcode.js index 5371ba07..bbb6d479 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -302,24 +302,15 @@ async function runTUI(config) { console.log = (...args) => { captured += args.join(' ') + '\n'; }; // Create a mock rl for command handler const mockRl = { prompt: () => {}, close: () => { screen.leave(); process.exit(0); } }; - // /provider's interactive wizard needs a real stdin/stdout, which the - // fullscreen TUI captures — so it silently did nothing (issue #80). - // Inside the TUI, surface the current provider/model status instead and - // point the user at the paths that DO work here (/endpoint, /model) or - // the shell wizard. - const provMatch = /^\/provider\b/.test(cmd); - const provSub = cmd.replace(/^\/provider\s*/, '').trim(); + // Some commands (e.g. /provider's interactive wizard) need a real + // stdin/stdout the fullscreen TUI captures, so they silently did nothing + // (issue #80). resolveTuiCommand swaps them for a non-interactive + // equivalent plus guidance. See ./tui_commands for the mapping. + const { resolveTuiCommand } = require('./tui_commands'); + const { command: routedCmd, guidance } = resolveTuiCommand(cmd); try { - if (provMatch && provSub !== 'status' && provSub !== '--status' && provSub !== '-s') { - await handleCmd('/provider status', mockRl); - captured += '\n The interactive provider wizard needs a real terminal and'; - captured += '\n cannot run inside the full-screen TUI. To reconfigure:'; - captured += '\n • /endpoint — switch the API base URL here'; - captured += '\n • /model — switch the model here'; - captured += '\n • run `smallcode /provider` from your shell for the full wizard'; - } else { - await handleCmd(cmd, mockRl); - } + await handleCmd(routedCmd, mockRl); + if (guidance) captured += guidance; } catch (e) { captured += `Error: ${e.message}\n`; } diff --git a/bin/tui_commands.js b/bin/tui_commands.js new file mode 100644 index 00000000..ee3da51d --- /dev/null +++ b/bin/tui_commands.js @@ -0,0 +1,44 @@ +'use strict'; + +// Slash-command resolution for the fullscreen TUI. +// +// The fullscreen TUI captures stdout and hands slash commands a mock readline, +// so any command whose handler needs a real interactive terminal can't run +// inside it. resolveTuiCommand() maps a raw command to: +// { command, guidance } +// command — the command string to pass to the normal command handler +// guidance — extra text to append after the captured output, or null +// +// Today this only special-cases /provider (issue #80): its interactive wizard +// is swapped for a non-interactive status dump plus a pointer to the paths that +// DO work inside the TUI (/endpoint, /model, or the shell wizard). Everything +// else passes through unchanged. + +// Subcommands of /provider that are already non-interactive and safe to run +// inside the TUI as-is. +const PROVIDER_STATUS_SUBS = new Set(['status', '--status', '-s']); + +const PROVIDER_GUIDANCE = [ + '', + ' The interactive provider wizard needs a real terminal and', + ' cannot run inside the full-screen TUI. To reconfigure:', + ' • /endpoint — switch the API base URL here', + ' • /model — switch the model here', + ' • run `smallcode /provider` from your shell for the full wizard', +].join('\n'); + +function resolveTuiCommand(cmd) { + const raw = String(cmd || ''); + if (!/^\/provider\b/.test(raw)) { + return { command: raw, guidance: null }; + } + const sub = raw.replace(/^\/provider\s*/, '').trim(); + if (PROVIDER_STATUS_SUBS.has(sub)) { + return { command: raw, guidance: null }; + } + // Bare /provider (or an unknown subcommand): show status + guidance instead + // of silently launching a wizard the TUI can't drive. + return { command: '/provider status', guidance: PROVIDER_GUIDANCE }; +} + +module.exports = { resolveTuiCommand, PROVIDER_GUIDANCE }; diff --git a/test/tui_commands.test.js b/test/tui_commands.test.js new file mode 100644 index 00000000..34a2ef77 --- /dev/null +++ b/test/tui_commands.test.js @@ -0,0 +1,59 @@ +'use strict'; + +// SmallCode — TUI slash-command resolution tests (issue #80) +// resolveTuiCommand maps a raw slash command to { command, guidance }. The +// fullscreen TUI can't host /provider's interactive wizard, so a bare +// /provider is rerouted to `/provider status` plus guidance text; everything +// else (including the already-non-interactive status subcommands) passes +// through unchanged. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { resolveTuiCommand, PROVIDER_GUIDANCE } = require('../bin/tui_commands'); + +test('bare /provider reroutes to status and attaches guidance', () => { + const r = resolveTuiCommand('/provider'); + assert.equal(r.command, '/provider status'); + assert.equal(r.guidance, PROVIDER_GUIDANCE); +}); + +test('/provider status|--status|-s pass through with no guidance', () => { + for (const sub of ['status', '--status', '-s']) { + const r = resolveTuiCommand(`/provider ${sub}`); + assert.equal(r.command, `/provider ${sub}`, sub); + assert.equal(r.guidance, null, sub); + } +}); + +test('an unknown /provider subcommand still reroutes to status + guidance', () => { + const r = resolveTuiCommand('/provider reset'); + assert.equal(r.command, '/provider status'); + assert.equal(r.guidance, PROVIDER_GUIDANCE); +}); + +test('non-provider commands pass through untouched', () => { + for (const cmd of ['/model', '/endpoint', '/help', '/quit']) { + const r = resolveTuiCommand(cmd); + assert.equal(r.command, cmd, cmd); + assert.equal(r.guidance, null, cmd); + } +}); + +test('a command that merely starts with "provider" is not matched', () => { + // \b word boundary: /providerx is a different command, not /provider. + const r = resolveTuiCommand('/providerx'); + assert.equal(r.command, '/providerx'); + assert.equal(r.guidance, null); +}); + +test('guidance points at the in-TUI alternatives and the shell wizard', () => { + assert.match(PROVIDER_GUIDANCE, /\/endpoint/); + assert.match(PROVIDER_GUIDANCE, /\/model/); + assert.match(PROVIDER_GUIDANCE, /smallcode \/provider/); +}); + +test('non-string / empty input is handled defensively', () => { + assert.deepEqual(resolveTuiCommand(''), { command: '', guidance: null }); + assert.deepEqual(resolveTuiCommand(undefined), { command: '', guidance: null }); +}); From b8e0eb4c0887278b6cb46f583fb3b08df861b7c2 Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 11:34:35 -0700 Subject: [PATCH 20/27] fix(minimax): alias layer + quality-monitor parrot-loop fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes for small models (minimax) misbehaving: 1. Add src/tools/tool_aliases.js — maps OpenAI/Claude-style tool names (Read, Edit, Bash, Grep, Glob, str_replace, LS, …) to SmallCode's real tool names, re-keying argument names as needed (file_path→path, old_string→old_str, etc.). Wire normalizeToolCall() in bin/smallcode.js right before the quality monitor and tool dispatch so the monitor sees real names (no false hallucinated_tool) and dispatch executes real tools. Also filter out quality-monitor/quality_monitor echo calls before they can re-trigger the feedback loop. 2. Change all four [QUALITY-MONITOR] injection prefixes in src/governor/quality_monitor.js to "Self-check note:" — a plain-text prefix small models won't parrot back as a bracketed tool name. Tests: test/tool_aliases.test.js (31 cases); updated quality_monitor.test.js. All 431 tests pass (npm test). Co-Authored-By: Claude Opus 4.8 --- bin/smallcode.js | 17 ++ src/governor/quality_monitor.js | 8 +- src/tools/tool_aliases.js | 198 +++++++++++++++++++++++ test/quality_monitor.test.js | 2 +- test/tool_aliases.test.js | 267 ++++++++++++++++++++++++++++++++ 5 files changed, 487 insertions(+), 5 deletions(-) create mode 100644 src/tools/tool_aliases.js create mode 100644 test/tool_aliases.test.js diff --git a/bin/smallcode.js b/bin/smallcode.js index 8aea3fe7..15fa4298 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -68,6 +68,7 @@ const { ToolScorer, checkAndEnforceHardFail, classifyTask, classifyTaskAsync } = const { EscalationEngine } = require('./escalation'); const { EarlyStopDetector } = require('../src/governor/early_stop'); const { QualityMonitor } = require('../src/governor/quality_monitor'); +const { normalizeToolCall } = require('../src/tools/tool_aliases'); const { applyReadGuard } = require('../src/session/read_guard'); const { TokenMonitor } = require('./token_monitor'); const { TraceRecorder } = require('./trace_recorder'); @@ -1086,6 +1087,22 @@ async function runAgentLoop(userMessage, config) { } catch {} } + // ── TOOL ALIAS NORMALIZATION ───────────────────────────────────────── + // Rename OpenAI/Claude-style tool names (Read, Edit, Bash, str_replace …) + // to SmallCode's real names BEFORE the quality monitor sees them so the + // monitor doesn't flag them as hallucinated, and before dispatch so the + // real handler runs. Also drop quality-monitor echo calls that small models + // sometimes parrot back as tool names, preventing the feedback loop. + if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) { + message.tool_calls = message.tool_calls + .map(normalizeToolCall) + .filter(tc => { + if (!tc || !tc.function) return false; + const n = tc.function.name; + return n !== 'quality-monitor' && n !== 'quality_monitor'; + }); + } + // ── QUALITY MONITOR (itsy port) ────────────────────────────────────── // Catches structural failure modes the model emitted on this turn: // empty turns, blank tool names, hallucinated tool names, and exact diff --git a/src/governor/quality_monitor.js b/src/governor/quality_monitor.js index a64ed92f..5919399a 100644 --- a/src/governor/quality_monitor.js +++ b/src/governor/quality_monitor.js @@ -58,7 +58,7 @@ class QualityMonitor { return this._fire({ kind: 'empty_response', injection: - '[QUALITY-MONITOR] Your previous response had no text and no tool ' + + 'Self-check note: Your previous response had no text and no tool ' + 'calls. Continue the task — either reply to the user or invoke a ' + 'tool. Do not return an empty turn.', }); @@ -72,7 +72,7 @@ class QualityMonitor { return this._fire({ kind: 'empty_tool_name', injection: - '[QUALITY-MONITOR] You emitted a tool call with an empty name. ' + + 'Self-check note: You emitted a tool call with an empty name. ' + 'Restart the call with a real tool name. Available tools are ' + `listed in the system prompt (e.g. ${this._sampleTools(knownTools)}).`, }); @@ -88,7 +88,7 @@ class QualityMonitor { return this._fire({ kind: 'hallucinated_tool', injection: - `[QUALITY-MONITOR] Tool "${name}" does not exist. Pick one ` + + `Self-check note: Tool "${name}" does not exist. Pick one ` + `from the registered tool list. Closest matches: ` + `${this._closestMatches(name, knownTools)}.`, }); @@ -107,7 +107,7 @@ class QualityMonitor { kind: 'repeat_call', signature: sig, injection: - `[QUALITY-MONITOR] You are repeating the same tool call ` + + `Self-check note: You are repeating the same tool call ` + `(${tc.function.name}) with identical arguments. The previous ` + 'call already returned a result — read it before retrying. If ' + 'you must retry, change the arguments first.', diff --git a/src/tools/tool_aliases.js b/src/tools/tool_aliases.js new file mode 100644 index 00000000..f487e8cd --- /dev/null +++ b/src/tools/tool_aliases.js @@ -0,0 +1,198 @@ +'use strict'; + +// SmallCode — Tool alias layer +// +// Maps OpenAI/Claude-style tool names that small models (e.g. minimax) tend +// to hallucinate onto SmallCode's real built-in tools, re-keying argument +// names as needed. Unknown names pass through unchanged. +// +// Usage (see bin/smallcode.js wiring): +// const { normalizeToolCall } = require('../src/tools/tool_aliases'); +// message.tool_calls = message.tool_calls.map(normalizeToolCall); + +// Real tool names that must NEVER be shadowed by an alias. +// If the incoming name is already one of these, pass through unchanged. +const REAL_TOOLS = new Set([ + 'read_file', 'write_file', 'patch', 'bash', + 'search', 'find_files', 'memory_remember', 'memory_recall', + 'select_category', 'done', +]); + +/** + * ALIASES maps lower-cased alias names to: + * { tool: , mapArgs: (parsedArgs) => remappedArgs } + * + * mapArgs receives a plain object (already JSON-parsed) and returns a new + * plain object with keys renamed according to the alias spec. + */ +const ALIASES = { + // ── read_file ───────────────────────────────────────────────────────────── + read: { + tool: 'read_file', + mapArgs(a) { + const out = { ...a }; + // file_path / filepath → path + if ('file_path' in out && !('path' in out)) { out.path = out.file_path; delete out.file_path; } + if ('filepath' in out && !('path' in out)) { out.path = out.filepath; delete out.filepath; } + // line / offset → start_line (keep start_line/end_line as-is) + if ('line' in out && !('start_line' in out)) { out.start_line = out.line; delete out.line; } + if ('offset' in out && !('start_line' in out)) { out.start_line = out.offset; delete out.offset; } + return out; + }, + }, + view: { + tool: 'read_file', + mapArgs(a) { return ALIASES.read.mapArgs(a); }, + }, + + // ── write_file ───────────────────────────────────────────────────────────── + write: { + tool: 'write_file', + mapArgs(a) { + const out = { ...a }; + if ('file_path' in out && !('path' in out)) { out.path = out.file_path; delete out.file_path; } + return out; + }, + }, + create_file: { + tool: 'write_file', + mapArgs(a) { return ALIASES.write.mapArgs(a); }, + }, + create: { + tool: 'write_file', + mapArgs(a) { return ALIASES.write.mapArgs(a); }, + }, + + // ── patch ────────────────────────────────────────────────────────────────── + edit: { + tool: 'patch', + mapArgs(a) { + const out = { ...a }; + if ('file_path' in out && !('path' in out)) { out.path = out.file_path; delete out.file_path; } + if ('old_string' in out && !('old_str' in out)) { out.old_str = out.old_string; delete out.old_string; } + if ('new_string' in out && !('new_str' in out)) { out.new_str = out.new_string; delete out.new_string; } + return out; + }, + }, + str_replace: { + tool: 'patch', + mapArgs(a) { return ALIASES.edit.mapArgs(a); }, + }, + str_replace_editor: { + tool: 'patch', + mapArgs(a) { return ALIASES.edit.mapArgs(a); }, + }, + replace: { + tool: 'patch', + mapArgs(a) { return ALIASES.edit.mapArgs(a); }, + }, + + // ── bash ─────────────────────────────────────────────────────────────────── + bash: { + tool: 'bash', + mapArgs(a) { + const out = { ...a }; + if ('cmd' in out && !('command' in out)) { out.command = out.cmd; delete out.cmd; } + return out; + }, + }, + shell: { + tool: 'bash', + mapArgs(a) { return ALIASES.bash.mapArgs(a); }, + }, + run_command: { + tool: 'bash', + mapArgs(a) { return ALIASES.bash.mapArgs(a); }, + }, + + // ── search ───────────────────────────────────────────────────────────────── + grep: { + tool: 'search', + mapArgs(a) { + const out = { ...a }; + if ('query' in out && !('pattern' in out)) { out.pattern = out.query; delete out.query; } + return out; + }, + }, + + // ── find_files ───────────────────────────────────────────────────────────── + glob: { + tool: 'find_files', + mapArgs(a) { + const out = { ...a }; + if ('query' in out && !('pattern' in out)) { out.pattern = out.query; delete out.query; } + return out; + }, + }, + ls: { + tool: 'find_files', + mapArgs(a) { + const dir = (a && a.path) ? String(a.path).replace(/[\\/]+$/, '') : '.'; + return { pattern: dir + '/*' }; + }, + }, + list_dir: { + tool: 'find_files', + mapArgs(a) { return ALIASES.ls.mapArgs(a); }, + }, + list_directory: { + tool: 'find_files', + mapArgs(a) { return ALIASES.ls.mapArgs(a); }, + }, +}; + +// Also register upper-case variants used by Claude Code tooling (Read, Write, +// Edit, Bash, Grep, Glob, LS) — identical mapArgs, just different key casing. +// We do this by normalising to lower-case before lookup, so no extra entries +// are needed (see normalizeToolCall below). + +/** + * Normalize a single OpenAI-shape tool_call. + * + * @param {{ function: { name: string, arguments: string } }} toolCall + * @returns {object} A new tool_call with real name + remapped args, or the + * original object if no alias matched. + */ +function normalizeToolCall(toolCall) { + if (!toolCall || !toolCall.function) return toolCall; + + const rawName = toolCall.function.name; + if (typeof rawName !== 'string') return toolCall; + + // If the name is already an exact match to a real tool, don't touch it. + if (REAL_TOOLS.has(rawName)) return toolCall; + + const key = rawName.toLowerCase(); + + const alias = ALIASES[key]; + if (!alias) return toolCall; // unknown name — pass through unchanged + + // Parse args (robust to malformed JSON). + let parsedArgs; + try { + parsedArgs = JSON.parse(toolCall.function.arguments || '{}'); + if (typeof parsedArgs !== 'object' || parsedArgs === null) parsedArgs = {}; + } catch { + // Malformed JSON — rename the tool but keep args string as-is + return { + ...toolCall, + function: { + ...toolCall.function, + name: alias.tool, + }, + }; + } + + const remappedArgs = alias.mapArgs(parsedArgs); + + return { + ...toolCall, + function: { + ...toolCall.function, + name: alias.tool, + arguments: JSON.stringify(remappedArgs), + }, + }; +} + +module.exports = { ALIASES, REAL_TOOLS, normalizeToolCall }; diff --git a/test/quality_monitor.test.js b/test/quality_monitor.test.js index 48569cac..77266020 100644 --- a/test/quality_monitor.test.js +++ b/test/quality_monitor.test.js @@ -10,7 +10,7 @@ test('empty response (no text + no tool calls) fires empty_response', () => { const sig = qm.inspect({ message: { content: ' ', tool_calls: [] }, knownTools: ['read_file'] }); assert.ok(sig); assert.equal(sig.kind, 'empty_response'); - assert.match(sig.injection, /\[QUALITY-MONITOR\]/); + assert.match(sig.injection, /Self-check note:/); }); test('empty tool name fires empty_tool_name', () => { diff --git a/test/tool_aliases.test.js b/test/tool_aliases.test.js new file mode 100644 index 00000000..06af7f2e --- /dev/null +++ b/test/tool_aliases.test.js @@ -0,0 +1,267 @@ +'use strict'; + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { ALIASES, REAL_TOOLS, normalizeToolCall } = require('../src/tools/tool_aliases'); + +// Helper: build a minimal OpenAI-shape tool_call +function tc(name, argsObj) { + return { function: { name, arguments: JSON.stringify(argsObj) } }; +} + +// ── Read → read_file ───────────────────────────────────────────────────────── + +test('Read → read_file, file_path → path', () => { + const result = normalizeToolCall(tc('Read', { file_path: 'src/foo.js' })); + assert.equal(result.function.name, 'read_file'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.path, 'src/foo.js'); + assert.equal(args.file_path, undefined); +}); + +test('read (lowercase) → read_file', () => { + const result = normalizeToolCall(tc('read', { file_path: 'a.ts' })); + assert.equal(result.function.name, 'read_file'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.path, 'a.ts'); +}); + +test('view → read_file, filepath → path', () => { + const result = normalizeToolCall(tc('view', { filepath: 'lib/x.js', start_line: 10 })); + assert.equal(result.function.name, 'read_file'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.path, 'lib/x.js'); + assert.equal(args.start_line, 10); +}); + +test('READ (all-caps) → read_file', () => { + const result = normalizeToolCall(tc('READ', { file_path: 'b.py' })); + assert.equal(result.function.name, 'read_file'); +}); + +// ── Edit / str_replace → patch ─────────────────────────────────────────────── + +test('Edit → patch, old_string/new_string → old_str/new_str', () => { + const result = normalizeToolCall(tc('Edit', { + file_path: 'a.ts', + old_string: 'x', + new_string: 'y', + })); + assert.equal(result.function.name, 'patch'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.path, 'a.ts'); + assert.equal(args.old_str, 'x'); + assert.equal(args.new_str, 'y'); + assert.equal(args.old_string, undefined); + assert.equal(args.new_string, undefined); + assert.equal(args.file_path, undefined); +}); + +test('str_replace → patch', () => { + const result = normalizeToolCall(tc('str_replace', { + file_path: 'b.js', + old_string: 'foo', + new_string: 'bar', + })); + assert.equal(result.function.name, 'patch'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.old_str, 'foo'); + assert.equal(args.new_str, 'bar'); +}); + +test('str_replace_editor → patch', () => { + const result = normalizeToolCall(tc('str_replace_editor', { file_path: 'c.ts', old_string: 'a', new_string: 'b' })); + assert.equal(result.function.name, 'patch'); +}); + +test('replace → patch', () => { + const result = normalizeToolCall(tc('replace', { file_path: 'c.ts', old_string: 'a', new_string: 'b' })); + assert.equal(result.function.name, 'patch'); +}); + +// ── Bash → bash ─────────────────────────────────────────────────────────────── + +test('Bash (capitalized) → bash alias applied', () => { + // 'Bash' is NOT in REAL_TOOLS (exact match), so the bash alias fires. + // The alias is idempotent: tool stays 'bash', command key preserved. + const result = normalizeToolCall(tc('Bash', { command: 'ls -la' })); + assert.equal(result.function.name, 'bash'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.command, 'ls -la'); +}); + +test('shell → bash, cmd → command', () => { + const result = normalizeToolCall(tc('shell', { cmd: 'echo hi' })); + assert.equal(result.function.name, 'bash'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.command, 'echo hi'); + assert.equal(args.cmd, undefined); +}); + +test('run_command → bash', () => { + const result = normalizeToolCall(tc('run_command', { command: 'npm test' })); + assert.equal(result.function.name, 'bash'); +}); + +// ── Grep → search ───────────────────────────────────────────────────────────── + +test('Grep → search, pattern key preserved', () => { + const result = normalizeToolCall(tc('Grep', { pattern: 'foo.*bar', path: 'src/' })); + assert.equal(result.function.name, 'search'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'foo.*bar'); + assert.equal(args.path, 'src/'); +}); + +test('grep (lowercase) → search, query → pattern', () => { + const result = normalizeToolCall(tc('grep', { query: 'myFunc' })); + assert.equal(result.function.name, 'search'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'myFunc'); + assert.equal(args.query, undefined); +}); + +// ── Glob → find_files ───────────────────────────────────────────────────────── + +test('Glob → find_files, pattern key preserved', () => { + const result = normalizeToolCall(tc('Glob', { pattern: '**/*.ts' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, '**/*.ts'); +}); + +test('glob (lowercase) → find_files, query → pattern', () => { + const result = normalizeToolCall(tc('glob', { query: '**/*.js' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, '**/*.js'); +}); + +// ── LS / list_dir → find_files with derived pattern ────────────────────────── + +test('LS → find_files, path → pattern with /*', () => { + const result = normalizeToolCall(tc('LS', { path: 'src/tools' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'src/tools/*'); +}); + +test('ls → find_files, trailing slash stripped from path', () => { + const result = normalizeToolCall(tc('ls', { path: 'src/' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'src/*'); +}); + +test('ls with no path → find_files with ./*', () => { + const result = normalizeToolCall(tc('ls', {})); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, './*'); +}); + +test('list_dir → find_files', () => { + const result = normalizeToolCall(tc('list_dir', { path: 'bin' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'bin/*'); +}); + +test('list_directory → find_files', () => { + const result = normalizeToolCall(tc('list_directory', { path: 'src' })); + assert.equal(result.function.name, 'find_files'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.pattern, 'src/*'); +}); + +// ── Real tool names pass through untouched ──────────────────────────────────── + +test('read_file (real name) passes through unchanged', () => { + const input = tc('read_file', { path: 'foo.js' }); + const result = normalizeToolCall(input); + assert.strictEqual(result, input); // same reference — not copied +}); + +test('patch (real name) passes through unchanged', () => { + const input = tc('patch', { path: 'a.js', old_str: 'x', new_str: 'y' }); + const result = normalizeToolCall(input); + assert.strictEqual(result, input); +}); + +test('write_file (real name) passes through unchanged', () => { + const input = tc('write_file', { path: 'new.js', content: 'hello' }); + const result = normalizeToolCall(input); + assert.strictEqual(result, input); +}); + +// ── Unknown names pass through ──────────────────────────────────────────────── + +test('unknown tool name passes through unchanged', () => { + const input = tc('some_custom_tool', { foo: 'bar' }); + const result = normalizeToolCall(input); + assert.strictEqual(result, input); +}); + +test('totally unknown name returns original object', () => { + const input = { function: { name: 'xyzzy', arguments: '{"a":1}' } }; + const result = normalizeToolCall(input); + assert.strictEqual(result, input); +}); + +// ── Malformed JSON args don't throw ────────────────────────────────────────── + +test('malformed JSON args: renames tool but keeps args string', () => { + const input = { function: { name: 'Edit', arguments: '{not valid json' } }; + let result; + assert.doesNotThrow(() => { + result = normalizeToolCall(input); + }); + assert.equal(result.function.name, 'patch'); + // args kept as-is (the bad string) + assert.equal(result.function.arguments, '{not valid json'); +}); + +test('empty args string: renames tool, produces empty object args', () => { + // 'Bash' → alias fires; empty string is falsy so falls back to '{}', + // parses cleanly, mapArgs({}){} → '{}'. No throw. + let result; + assert.doesNotThrow(() => { + result = normalizeToolCall({ function: { name: 'Bash', arguments: '' } }); + }); + assert.equal(result.function.name, 'bash'); + assert.equal(result.function.arguments, '{}'); +}); + +test('null args string: renames and produces empty object args', () => { + const result = normalizeToolCall({ function: { name: 'Grep', arguments: null } }); + assert.doesNotThrow(() => {}); + assert.equal(result.function.name, 'search'); +}); + +// ── normalizeToolCall is robust to bad inputs ───────────────────────────────── + +test('null input returns null', () => { + assert.equal(normalizeToolCall(null), null); +}); + +test('missing function property returns input unchanged', () => { + const input = { id: 'call_123' }; + assert.strictEqual(normalizeToolCall(input), input); +}); + +// ── Verify the key example from the spec ───────────────────────────────────── + +test('spec example: Edit with file_path/old_string/new_string → patch with path/old_str/new_str', () => { + const result = normalizeToolCall({ + function: { + name: 'Edit', + arguments: '{"file_path":"a.ts","old_string":"x","new_string":"y"}', + }, + }); + assert.equal(result.function.name, 'patch'); + const args = JSON.parse(result.function.arguments); + assert.equal(args.path, 'a.ts'); + assert.equal(args.old_str, 'x'); + assert.equal(args.new_str, 'y'); +}); From 8ff2042ff1a0573afad40d370a07a99893420d7b Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 11:19:40 -0700 Subject: [PATCH 21/27] fix(tui): keyboard nav, right-click paste, /provider in TUI (#80,#93,#96) #93 Add line/word navigation to the fullscreen input: Home/End (and Ctrl+A/Ctrl+E), Ctrl+Left/Right word jumps, Ctrl+W / Ctrl+Backspace word-delete-left, Ctrl+Delete word-delete-right, and forward Delete. Previously only plain arrows and Backspace were handled. #96 Honor right-click as paste. SGR mouse tracking makes the terminal forward right-clicks to the app instead of showing its native paste menu, which broke right-click paste on Linux. Handle SGR button-2 release as a clipboard paste; refactor the Ctrl+V clipboard read into a shared _pasteFromClipboard() helper. #80 Make /provider work in the fullscreen TUI. It now appears in the command palette, and since its interactive wizard cannot run inside the captured-stdout TUI, /provider shows current provider status plus guidance to use /endpoint, /model, or the shell wizard instead of silently doing nothing. #76 Document the already-supported model response timeout (SMALLCODE_MODEL_TIMEOUT / smallcode.toml [model].timeout) in the README. Adds test/input_editing.test.js (10 cases). Full suite: 410 passing. Co-Authored-By: Claude Opus 4.8 --- README.md | 8 +++ bin/smallcode.js | 18 +++++- src/tui/fullscreen.js | 126 +++++++++++++++++++++++++++++++------ test/input_editing.test.js | 111 ++++++++++++++++++++++++++++++++ 4 files changed, 242 insertions(+), 21 deletions(-) create mode 100644 test/input_editing.test.js diff --git a/README.md b/README.md index 062899c3..88da0530 100644 --- a/README.md +++ b/README.md @@ -142,8 +142,16 @@ SMALLCODE_BASE_URL=http://localhost:1234/v1 # OPENAI_API_KEY=sk-... # OPENROUTER_API_KEY=sk-or-v1-... # DEEPSEEK_API_KEY=sk-... + +# Optional: model response timeout in seconds (default 300 / 5 min). +# Raise this for slow CPU-only llama.cpp servers that need >5 min per turn. +# SMALLCODE_MODEL_TIMEOUT=1800 ``` +The model response timeout can also be set in `smallcode.toml` under `[model]` +as `timeout = `. If a turn exceeds it you'll see +`timeout: no response after s` — raise `SMALLCODE_MODEL_TIMEOUT` to fix. + See `.env.example` for all options. Also supports `smallcode.toml` for backwards compatibility. SmallCode can route each model tier to a different endpoint. This lets you keep diff --git a/bin/smallcode.js b/bin/smallcode.js index 15fa4298..2582bad1 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -303,8 +303,24 @@ async function runTUI(config) { console.log = (...args) => { captured += args.join(' ') + '\n'; }; // Create a mock rl for command handler const mockRl = { prompt: () => {}, close: () => { screen.leave(); process.exit(0); } }; + // /provider's interactive wizard needs a real stdin/stdout, which the + // fullscreen TUI captures — so it silently did nothing (issue #80). + // Inside the TUI, surface the current provider/model status instead and + // point the user at the paths that DO work here (/endpoint, /model) or + // the shell wizard. + const provMatch = /^\/provider\b/.test(cmd); + const provSub = cmd.replace(/^\/provider\s*/, '').trim(); try { - await handleCmd(cmd, mockRl); + if (provMatch && provSub !== 'status' && provSub !== '--status' && provSub !== '-s') { + await handleCmd('/provider status', mockRl); + captured += '\n The interactive provider wizard needs a real terminal and'; + captured += '\n cannot run inside the full-screen TUI. To reconfigure:'; + captured += '\n • /endpoint — switch the API base URL here'; + captured += '\n • /model — switch the model here'; + captured += '\n • run `smallcode /provider` from your shell for the full wizard'; + } else { + await handleCmd(cmd, mockRl); + } } catch (e) { captured += `Error: ${e.message}\n`; } diff --git a/src/tui/fullscreen.js b/src/tui/fullscreen.js index 96b77aa5..582e52c5 100644 --- a/src/tui/fullscreen.js +++ b/src/tui/fullscreen.js @@ -54,6 +54,22 @@ function visualCursorPosition(str, cursorIdx, maxVisualWidth) { return { line, col }; } +// Word-boundary helpers for input editing (issue #93). A "word" is a run of +// non-whitespace characters. Movement skips any whitespace adjacent to the +// cursor before scanning over the word, mirroring readline/Windows behaviour. +function prevWordBoundary(str, idx) { + let i = idx; + while (i > 0 && /\s/.test(str[i - 1])) i--; // skip whitespace to the left + while (i > 0 && !/\s/.test(str[i - 1])) i--; // skip the word itself + return i; +} +function nextWordBoundary(str, idx) { + let i = idx; + while (i < str.length && /\s/.test(str[i])) i++; // skip whitespace to the right + while (i < str.length && !/\s/.test(str[i])) i++; // skip the word itself + return i; +} + // ─── ANSI Escape Sequences ─────────────────────────────────────────────────── const ESC = '\x1b['; @@ -182,6 +198,7 @@ class FullScreenTUI { { cmd: '/quit', alias: '/q', desc: 'Exit SmallCode' }, { cmd: '/clear', alias: null, desc: 'Reset conversation' }, { cmd: '/model', alias: null, desc: 'Show/switch model' }, + { cmd: '/provider', alias: null, desc: 'Show provider / configure model' }, { cmd: '/endpoint', alias: null, desc: 'Switch API endpoint' }, { cmd: '/stats', alias: null, desc: 'Session statistics' }, { cmd: '/tokens', alias: null, desc: 'Token usage report' }, @@ -891,6 +908,60 @@ class FullScreenTUI { return; } + // ─── Line / word navigation (issue #93) ────────────────────────────── + // Home / Ctrl+A — start of line. Terminals send Home as \x1b[H, \x1b[1~, + // or \x1bOH depending on mode; Ctrl+A arrives as the raw byte \x01. + if (key === '\x1b[H' || key === '\x1b[1~' || key === '\x1bOH' || key === '\x01') { + this.inputCursor = 0; + this.render(); + return; + } + // End / Ctrl+E — end of line (\x1b[F, \x1b[4~, \x1bOF, or Ctrl+E = \x05). + if (key === '\x1b[F' || key === '\x1b[4~' || key === '\x1bOF' || key === '\x05') { + this.inputCursor = this.inputBuffer.length; + this.render(); + return; + } + // Ctrl+Left — previous word (\x1b[1;5D, and Alt+B = \x1bb as a fallback). + if (key === '\x1b[1;5D' || key === '\x1b[1;3D' || key === '\x1bb') { + this.inputCursor = prevWordBoundary(this.inputBuffer, this.inputCursor); + this.render(); + return; + } + // Ctrl+Right — next word (\x1b[1;5C, and Alt+F = \x1bf as a fallback). + if (key === '\x1b[1;5C' || key === '\x1b[1;3C' || key === '\x1bf') { + this.inputCursor = nextWordBoundary(this.inputBuffer, this.inputCursor); + this.render(); + return; + } + // Ctrl+Backspace / Ctrl+W — delete the word to the left of the cursor. + // Ctrl+Backspace reaches us as \x17 (Ctrl+W) or \x1b\x7f on many terminals. + if (key === '\x17' || key === '\x1b\x7f' || key === '\x1b\b') { + const start = prevWordBoundary(this.inputBuffer, this.inputCursor); + this.inputBuffer = this.inputBuffer.slice(0, start) + this.inputBuffer.slice(this.inputCursor); + this.inputCursor = start; + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + this.render(); + return; + } + // Ctrl+Delete — delete the word to the right of the cursor (\x1b[3;5~). + if (key === '\x1b[3;5~' || key === '\x1b[3;3~') { + const end = nextWordBoundary(this.inputBuffer, this.inputCursor); + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + this.inputBuffer.slice(end); + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + this.render(); + return; + } + // Delete (forward) — remove the character under the cursor (\x1b[3~). + if (key === '\x1b[3~') { + if (this.inputCursor < this.inputBuffer.length) { + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + this.inputBuffer.slice(this.inputCursor + 1); + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + } + this.render(); + return; + } + // Scroll chat — PgUp/PgDn, Shift+Up/Down, mouse wheel if (key === '\x1b[5~' || key === '\x1b[1;2A') { // PgUp or Shift+Up const maxBack = -(Math.max(0, this.chatLines.length - this.chatHeight)); @@ -917,6 +988,15 @@ class FullScreenTUI { this.render(); return; } + // Right-click — paste from clipboard (issue #96). Enabling SGR mouse + // tracking makes the terminal forward right-clicks to us instead of + // showing its native paste menu, so we honour the gesture ourselves. + // SGR button 2 (right) press is "\x1b[<2;X;YM", release "\x1b[<2;X;Ym". + if (/^\x1b\[<2;\d+;\d+m$/.test(key)) { + this._pasteFromClipboard(); + return; + } + // Mouse press / drag / release (SGR) — text selection in the chat panel. // Only the chat region selects; tool panel and input area are ignored. if (key.includes('\x1b[<')) { @@ -929,27 +1009,9 @@ class FullScreenTUI { return; } - // Ctrl+V — paste from clipboard (Windows) + // Ctrl+V — paste from clipboard (issue #96: right-click also routes here) if (key === '\x16') { - try { - const { execSync } = require('child_process'); - let clipboard = ''; - if (process.platform === 'win32') { - clipboard = execSync('powershell -command "Get-Clipboard"', { encoding: 'utf-8', timeout: 3000 }).trim(); - } else if (process.platform === 'darwin') { - clipboard = execSync('pbpaste', { encoding: 'utf-8', timeout: 3000 }).trim(); - } else { - clipboard = execSync('xclip -selection clipboard -o 2>/dev/null || xsel --clipboard --output 2>/dev/null', { encoding: 'utf-8', timeout: 3000, shell: true }).trim(); - } - if (clipboard) { - // Replace newlines with spaces for input line - const text = clipboard.replace(/[\r\n]+/g, ' '); - this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + text + this.inputBuffer.slice(this.inputCursor); - this.inputCursor += text.length; - this.commandPaletteOpen = this.inputBuffer.startsWith('/'); - this.render(); - } - } catch {} + this._pasteFromClipboard(); return; } @@ -976,6 +1038,30 @@ class FullScreenTUI { } } + // Insert clipboard contents at the cursor. Shared by Ctrl+V and the + // right-click gesture (issue #96). Newlines collapse to spaces so the + // single-line input stays intact. + _pasteFromClipboard() { + try { + const { execSync } = require('child_process'); + let clipboard = ''; + if (process.platform === 'win32') { + clipboard = execSync('powershell -command "Get-Clipboard"', { encoding: 'utf-8', timeout: 3000 }).trim(); + } else if (process.platform === 'darwin') { + clipboard = execSync('pbpaste', { encoding: 'utf-8', timeout: 3000 }).trim(); + } else { + clipboard = execSync('xclip -selection clipboard -o 2>/dev/null || xsel --clipboard --output 2>/dev/null', { encoding: 'utf-8', timeout: 3000, shell: true }).trim(); + } + if (clipboard) { + const text = clipboard.replace(/[\r\n]+/g, ' '); + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + text + this.inputBuffer.slice(this.inputCursor); + this.inputCursor += text.length; + this.commandPaletteOpen = this.inputBuffer.startsWith('/'); + this.render(); + } + } catch {} + } + _onResize() { this._computeLayout(); this.render(); diff --git a/test/input_editing.test.js b/test/input_editing.test.js new file mode 100644 index 00000000..c04ed0c1 --- /dev/null +++ b/test/input_editing.test.js @@ -0,0 +1,111 @@ +'use strict'; + +// SmallCode — input line editing tests (issues #93, #96) +// Line/word navigation and right-click paste in the fullscreen TUI input. +// Drives _onKeypress directly with raw key bytes and asserts on the resulting +// inputBuffer / inputCursor state. render() is stubbed so no terminal is needed. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { FullScreenTUI } = require('../src/tui/fullscreen'); + +function makeTui(buffer = '', cursor = null) { + const tui = new FullScreenTUI(); + tui.render = () => {}; // no terminal in tests + tui.inputBuffer = buffer; + tui.inputCursor = cursor == null ? buffer.length : cursor; + return tui; +} + +const send = (tui, key) => tui._onKeypress(Buffer.from(key, 'binary')); + +// ─── Line navigation (issue #93) ─────────────────────────────────────────── + +test('Home (\\x1b[H) and Ctrl+A (\\x01) move to start of line', async () => { + for (const key of ['\x1b[H', '\x1b[1~', '\x01']) { + const tui = makeTui('hello world'); + await send(tui, key); + assert.equal(tui.inputCursor, 0, `key ${JSON.stringify(key)}`); + } +}); + +test('End (\\x1b[F) and Ctrl+E (\\x05) move to end of line', async () => { + for (const key of ['\x1b[F', '\x1b[4~', '\x05']) { + const tui = makeTui('hello world', 0); + await send(tui, key); + assert.equal(tui.inputCursor, 11, `key ${JSON.stringify(key)}`); + } +}); + +// ─── Word navigation (issue #93) ─────────────────────────────────────────── + +test('Ctrl+Left (\\x1b[1;5D) jumps to the previous word boundary', async () => { + const tui = makeTui('hello world foo'); // cursor at end (15) + await send(tui, '\x1b[1;5D'); + assert.equal(tui.inputCursor, 12); // start of "foo" + await send(tui, '\x1b[1;5D'); + assert.equal(tui.inputCursor, 6); // start of "world" +}); + +test('Ctrl+Right (\\x1b[1;5C) jumps to the next word boundary', async () => { + const tui = makeTui('hello world foo', 0); + await send(tui, '\x1b[1;5C'); + assert.equal(tui.inputCursor, 5); // end of "hello" + await send(tui, '\x1b[1;5C'); + assert.equal(tui.inputCursor, 11); // end of "world" +}); + +// ─── Word / char deletion (issue #93) ────────────────────────────────────── + +test('Ctrl+W (\\x17) deletes the word to the left of the cursor', async () => { + const tui = makeTui('hello world foo'); + await send(tui, '\x17'); + assert.equal(tui.inputBuffer, 'hello world '); + assert.equal(tui.inputCursor, 12); +}); + +test('Ctrl+Delete (\\x1b[3;5~) deletes the word to the right', async () => { + const tui = makeTui('hello world foo', 6); // cursor before "world" + await send(tui, '\x1b[3;5~'); + assert.equal(tui.inputBuffer, 'hello foo'); + assert.equal(tui.inputCursor, 6); +}); + +test('Delete (\\x1b[3~) removes the character under the cursor', async () => { + const tui = makeTui('abc', 1); + await send(tui, '\x1b[3~'); + assert.equal(tui.inputBuffer, 'ac'); + assert.equal(tui.inputCursor, 1); +}); + +test('word-delete keeps the command palette state in sync', async () => { + const tui = makeTui('/model gpt', 10); + await send(tui, '\x17'); // delete the "gpt" argument + assert.equal(tui.inputBuffer, '/model '); + assert.equal(tui.commandPaletteOpen, true); // still a slash command +}); + +// ─── Right-click paste (issue #96) ───────────────────────────────────────── + +test('right-click release pastes clipboard at the cursor', async () => { + const tui = makeTui('ab', 1); + tui._pasteFromClipboard = function () { // stub the OS clipboard read + const text = 'XY'; + this.inputBuffer = this.inputBuffer.slice(0, this.inputCursor) + text + this.inputBuffer.slice(this.inputCursor); + this.inputCursor += text.length; + }; + await send(tui, '\x1b[<2;10;5m'); // SGR button-2 (right) release + assert.equal(tui.inputBuffer, 'aXYb'); + assert.equal(tui.inputCursor, 3); +}); + +test('right-click press and left-click do not trigger paste', async () => { + let pasted = false; + const tui = makeTui('ab', 1); + tui._pasteFromClipboard = () => { pasted = true; }; + tui._onMouseSelect = () => true; // swallow selection handling + await send(tui, '\x1b[<2;10;5M'); // right-button PRESS (uppercase M) + await send(tui, '\x1b[<0;10;5m'); // left-button release + assert.equal(pasted, false); +}); From 27488ab917599e2d65a9e56e1593c2d6900d8eaf Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 11:33:17 -0700 Subject: [PATCH 22/27] refactor(tui): extract resolveTuiCommand helper + tests (#80) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the /provider TUI-routing logic out of the inline onCommand handler in bin/smallcode.js into bin/tui_commands.js as a pure resolveTuiCommand(cmd) function returning { command, guidance }. Behavior is unchanged — the guidance text is byte-identical to the previous inline version — but the mapping is now unit-testable without booting the full TUI. Adds test/tui_commands.test.js (7 cases): bare /provider reroutes to status + guidance, status/--status/-s pass through, unknown subcommands reroute, non-provider commands untouched, /providerx word-boundary, and defensive handling of empty/undefined input. Full suite: 417 passing. Co-Authored-By: Claude Opus 4.8 --- bin/smallcode.js | 25 ++++++----------- bin/tui_commands.js | 44 +++++++++++++++++++++++++++++ test/tui_commands.test.js | 59 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 17 deletions(-) create mode 100644 bin/tui_commands.js create mode 100644 test/tui_commands.test.js diff --git a/bin/smallcode.js b/bin/smallcode.js index 2582bad1..7e86eefb 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -303,24 +303,15 @@ async function runTUI(config) { console.log = (...args) => { captured += args.join(' ') + '\n'; }; // Create a mock rl for command handler const mockRl = { prompt: () => {}, close: () => { screen.leave(); process.exit(0); } }; - // /provider's interactive wizard needs a real stdin/stdout, which the - // fullscreen TUI captures — so it silently did nothing (issue #80). - // Inside the TUI, surface the current provider/model status instead and - // point the user at the paths that DO work here (/endpoint, /model) or - // the shell wizard. - const provMatch = /^\/provider\b/.test(cmd); - const provSub = cmd.replace(/^\/provider\s*/, '').trim(); + // Some commands (e.g. /provider's interactive wizard) need a real + // stdin/stdout the fullscreen TUI captures, so they silently did nothing + // (issue #80). resolveTuiCommand swaps them for a non-interactive + // equivalent plus guidance. See ./tui_commands for the mapping. + const { resolveTuiCommand } = require('./tui_commands'); + const { command: routedCmd, guidance } = resolveTuiCommand(cmd); try { - if (provMatch && provSub !== 'status' && provSub !== '--status' && provSub !== '-s') { - await handleCmd('/provider status', mockRl); - captured += '\n The interactive provider wizard needs a real terminal and'; - captured += '\n cannot run inside the full-screen TUI. To reconfigure:'; - captured += '\n • /endpoint — switch the API base URL here'; - captured += '\n • /model — switch the model here'; - captured += '\n • run `smallcode /provider` from your shell for the full wizard'; - } else { - await handleCmd(cmd, mockRl); - } + await handleCmd(routedCmd, mockRl); + if (guidance) captured += guidance; } catch (e) { captured += `Error: ${e.message}\n`; } diff --git a/bin/tui_commands.js b/bin/tui_commands.js new file mode 100644 index 00000000..ee3da51d --- /dev/null +++ b/bin/tui_commands.js @@ -0,0 +1,44 @@ +'use strict'; + +// Slash-command resolution for the fullscreen TUI. +// +// The fullscreen TUI captures stdout and hands slash commands a mock readline, +// so any command whose handler needs a real interactive terminal can't run +// inside it. resolveTuiCommand() maps a raw command to: +// { command, guidance } +// command — the command string to pass to the normal command handler +// guidance — extra text to append after the captured output, or null +// +// Today this only special-cases /provider (issue #80): its interactive wizard +// is swapped for a non-interactive status dump plus a pointer to the paths that +// DO work inside the TUI (/endpoint, /model, or the shell wizard). Everything +// else passes through unchanged. + +// Subcommands of /provider that are already non-interactive and safe to run +// inside the TUI as-is. +const PROVIDER_STATUS_SUBS = new Set(['status', '--status', '-s']); + +const PROVIDER_GUIDANCE = [ + '', + ' The interactive provider wizard needs a real terminal and', + ' cannot run inside the full-screen TUI. To reconfigure:', + ' • /endpoint — switch the API base URL here', + ' • /model — switch the model here', + ' • run `smallcode /provider` from your shell for the full wizard', +].join('\n'); + +function resolveTuiCommand(cmd) { + const raw = String(cmd || ''); + if (!/^\/provider\b/.test(raw)) { + return { command: raw, guidance: null }; + } + const sub = raw.replace(/^\/provider\s*/, '').trim(); + if (PROVIDER_STATUS_SUBS.has(sub)) { + return { command: raw, guidance: null }; + } + // Bare /provider (or an unknown subcommand): show status + guidance instead + // of silently launching a wizard the TUI can't drive. + return { command: '/provider status', guidance: PROVIDER_GUIDANCE }; +} + +module.exports = { resolveTuiCommand, PROVIDER_GUIDANCE }; diff --git a/test/tui_commands.test.js b/test/tui_commands.test.js new file mode 100644 index 00000000..34a2ef77 --- /dev/null +++ b/test/tui_commands.test.js @@ -0,0 +1,59 @@ +'use strict'; + +// SmallCode — TUI slash-command resolution tests (issue #80) +// resolveTuiCommand maps a raw slash command to { command, guidance }. The +// fullscreen TUI can't host /provider's interactive wizard, so a bare +// /provider is rerouted to `/provider status` plus guidance text; everything +// else (including the already-non-interactive status subcommands) passes +// through unchanged. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { resolveTuiCommand, PROVIDER_GUIDANCE } = require('../bin/tui_commands'); + +test('bare /provider reroutes to status and attaches guidance', () => { + const r = resolveTuiCommand('/provider'); + assert.equal(r.command, '/provider status'); + assert.equal(r.guidance, PROVIDER_GUIDANCE); +}); + +test('/provider status|--status|-s pass through with no guidance', () => { + for (const sub of ['status', '--status', '-s']) { + const r = resolveTuiCommand(`/provider ${sub}`); + assert.equal(r.command, `/provider ${sub}`, sub); + assert.equal(r.guidance, null, sub); + } +}); + +test('an unknown /provider subcommand still reroutes to status + guidance', () => { + const r = resolveTuiCommand('/provider reset'); + assert.equal(r.command, '/provider status'); + assert.equal(r.guidance, PROVIDER_GUIDANCE); +}); + +test('non-provider commands pass through untouched', () => { + for (const cmd of ['/model', '/endpoint', '/help', '/quit']) { + const r = resolveTuiCommand(cmd); + assert.equal(r.command, cmd, cmd); + assert.equal(r.guidance, null, cmd); + } +}); + +test('a command that merely starts with "provider" is not matched', () => { + // \b word boundary: /providerx is a different command, not /provider. + const r = resolveTuiCommand('/providerx'); + assert.equal(r.command, '/providerx'); + assert.equal(r.guidance, null); +}); + +test('guidance points at the in-TUI alternatives and the shell wizard', () => { + assert.match(PROVIDER_GUIDANCE, /\/endpoint/); + assert.match(PROVIDER_GUIDANCE, /\/model/); + assert.match(PROVIDER_GUIDANCE, /smallcode \/provider/); +}); + +test('non-string / empty input is handled defensively', () => { + assert.deepEqual(resolveTuiCommand(''), { command: '', guidance: null }); + assert.deepEqual(resolveTuiCommand(undefined), { command: '', guidance: null }); +}); From d19a820c108db03f97b9162193d5b3f539097df6 Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 11:40:11 -0700 Subject: [PATCH 23/27] fix(mcp): stop the smallcode --mcp fork bomb (#82) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A self-referential entry in mcp.json (a server whose command relaunches `smallcode --mcp`) caused unbounded process spawning: each smallcode MCP server booted, connected to its own configured MCP servers, and spawned another smallcode MCP server — recursively. Reporters saw thousands of `node smallcode.js --mcp` processes and thousands of identical session files, exhausting RAM until the OOM killer fired. Two layers of defense: 1. Host-side (root cause): bin/smallcode.js no longer initializes the external MCP client when running in --mcp server mode. An MCP server must not also act as an MCP host. This alone breaks the recursion regardless of mcp.json contents, and stops the server from creating a session file on every spawn. 2. Config-side (defense-in-depth): MCPClient.loadConfig() now skips any server entry that would relaunch smallcode in --mcp mode, via the new static MCPClient._isSelfReference(). Catches direct, node, npx, and smolv2 forms while leaving legitimate third-party servers untouched. Adds test/mcp_self_reference.test.js (4 cases) plus an on-disk loadConfig integration check. Full suite: 452 passing. Co-Authored-By: Claude Opus 4.8 --- bin/smallcode.js | 26 +++++++++------ src/tools/mcp_client.js | 18 +++++++++++ test/mcp_self_reference.test.js | 56 +++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 10 deletions(-) create mode 100644 test/mcp_self_reference.test.js diff --git a/bin/smallcode.js b/bin/smallcode.js index 7e86eefb..33be1773 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -3120,17 +3120,23 @@ async function main() { skillManager = new SkillManager(process.cwd()); - // Initialize MCP client (connect to external MCP servers) + // Initialize MCP client (connect to external MCP servers). + // Skipped entirely in --mcp server mode: an MCP server must not also act as + // an MCP host. Otherwise a self-referential `smallcode --mcp` entry in + // mcp.json makes each server spawn another server recursively — an unbounded + // fork bomb that exhausts RAM (issue #82). let mcpClient = null; - const mcpClientInstance = new MCPClient(process.cwd()); - if (mcpClientInstance.loadConfig() > 0) { - mcpClient = mcpClientInstance; - // Connect asynchronously — don't block boot - mcpClient.connectAll().then(toolCount => { - if (toolCount > 0 && _fullscreenRef) { - _fullscreenRef.addTool('mcp-client', 'ok', `${toolCount} external tools from ${mcpClient.servers.size} servers`); - } - }).catch(() => {}); + if (!flags.mcp) { + const mcpClientInstance = new MCPClient(process.cwd()); + if (mcpClientInstance.loadConfig() > 0) { + mcpClient = mcpClientInstance; + // Connect asynchronously — don't block boot + mcpClient.connectAll().then(toolCount => { + if (toolCount > 0 && _fullscreenRef) { + _fullscreenRef.addTool('mcp-client', 'ok', `${toolCount} external tools from ${mcpClient.servers.size} servers`); + } + }).catch(() => {}); + } } // Initialize session + token tracking diff --git a/src/tools/mcp_client.js b/src/tools/mcp_client.js index 04ddc3a3..537c7fef 100644 --- a/src/tools/mcp_client.js +++ b/src/tools/mcp_client.js @@ -46,6 +46,10 @@ class MCPClient { const servers = content.mcpServers || {}; for (const [name, cfg] of Object.entries(servers)) { if (cfg.disabled) continue; + // Skip a self-referential entry that relaunches smallcode in --mcp + // mode. Combined with the host-side guard, this prevents the fork + // bomb from issue #82 even if a stale/bad mcp.json registers it. + if (MCPClient._isSelfReference(cfg)) continue; this.servers.set(name, { config: { name, @@ -65,6 +69,20 @@ class MCPClient { return this.servers.size; } + /** + * Detect a server config that would relaunch SmallCode itself as an MCP + * server (`smallcode --mcp`, `node smallcode.js --mcp`, `npx smallcode --mcp`, + * `smolv2 --mcp`, …). Spawning these from the MCP client is what produced the + * runaway process fork bomb in issue #82. + */ + static _isSelfReference(cfg) { + if (!cfg) return false; + const args = Array.isArray(cfg.args) ? cfg.args : []; + if (!args.includes('--mcp')) return false; + const hay = [cfg.command || '', ...args].join(' ').toLowerCase(); + return /\bsmallcode\b|smallcode\.js|\bsmolv2\b/.test(hay); + } + /** * Connect to all configured servers and discover their tools. * Returns number of tools discovered. diff --git a/test/mcp_self_reference.test.js b/test/mcp_self_reference.test.js new file mode 100644 index 00000000..f6a221c6 --- /dev/null +++ b/test/mcp_self_reference.test.js @@ -0,0 +1,56 @@ +'use strict'; + +// SmallCode — MCP self-reference guard tests (issue #82) +// A self-referential mcp.json entry that relaunches `smallcode --mcp` made each +// MCP server spawn another server recursively — an unbounded fork bomb that +// exhausted RAM. MCPClient._isSelfReference flags such entries so loadConfig +// can skip them. (The primary fix is host-side: --mcp mode never runs the +// client at all; this is defense-in-depth for a stale/bad config.) + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { MCPClient } = require('../src/tools/mcp_client'); + +test('flags direct smallcode --mcp entries', () => { + const cases = [ + { command: 'smallcode', args: ['--mcp'] }, + { command: 'node', args: ['/home/u/.smallcode/bin/smallcode.js', '--mcp'] }, + { command: 'npx', args: ['smallcode', '--mcp'] }, + { command: 'smolv2', args: ['--mcp'] }, + ]; + for (const cfg of cases) { + assert.equal(MCPClient._isSelfReference(cfg), true, JSON.stringify(cfg)); + } +}); + +test('does NOT flag legitimate third-party MCP servers', () => { + const cases = [ + { command: 'node', args: ['./my-server.js'] }, // no --mcp + { command: 'uvx', args: ['mcp-server-fetch'] }, + { command: 'docker', args: ['run', 'ghcr.io/foo/bar'] }, + { command: 'smallcode', args: [] }, // smallcode, but not --mcp + { command: 'node', args: ['smallcode-helper.js'] }, // name match but no --mcp + ]; + for (const cfg of cases) { + assert.equal(MCPClient._isSelfReference(cfg), false, JSON.stringify(cfg)); + } +}); + +test('handles malformed configs defensively', () => { + assert.equal(MCPClient._isSelfReference(null), false); + assert.equal(MCPClient._isSelfReference({}), false); + assert.equal(MCPClient._isSelfReference({ command: 'smallcode' }), false); // args undefined + assert.equal(MCPClient._isSelfReference({ args: '--mcp' }), false); // args not an array +}); + +test('loadConfig skips a self-referential entry but keeps real ones', () => { + // loadConfig reads from disk; here we exercise the filter directly by + // simulating what loadConfig does with a parsed mcpServers object. + const servers = { + 'fork-bomb': { command: 'node', args: ['smallcode.js', '--mcp'] }, + 'fetch': { command: 'uvx', args: ['mcp-server-fetch'] }, + }; + const kept = Object.entries(servers).filter(([, cfg]) => !MCPClient._isSelfReference(cfg)); + assert.deepEqual(kept.map(([n]) => n), ['fetch']); +}); From 89e87f8eb527cf2392c5843dc25ec4b49f1631e2 Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 11:59:09 -0700 Subject: [PATCH 24/27] =?UTF-8?q?feat(tui):=20live=20activity=20feed=20?= =?UTF-8?q?=E2=80=94=20Phase=20A:=20tool-start=20+=20context=20meter=20(#7?= =?UTF-8?q?7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase A of the live activity feed: the TUI now shows work as it happens instead of only finished tool results. Per-feature toggles via a new /live command, seeded from env. - bin/live_settings.js: pure settings module (tools/context/stream/thinking), env-seeded, with resolveLiveCommand() for the /live command. tools+context default ON; stream+thinking default OFF (Phase B changes the request path). - TUI: toolStart()/toolEnd() push an in-progress ⚙ line the moment a tool starts and rewrite it to ✓/✗ in place on completion (anchored against front-trimming). setContextMeter() renders a live "ctx 42% (13k/32k)" footer indicator. - TokenMonitor: track lastPromptTokens; contextMeter(window) snapshot. - Agent loop: dispatch site starts the live tool line; the console.log override finishes it and refreshes the context meter; meter also refreshes after each model turn. Classic behavior preserved when /live tools is off. - /live command (commands.js) + palette entry. Adds test/live_settings.test.js (8) and test/live_tui.test.js (6). Plus an end-to-end /live command check. Full suite: 465 passing. Design: docs/plans/2026-06-14-live-activity-feed-design.md Co-Authored-By: Claude Opus 4.8 --- bin/commands.js | 10 ++ bin/live_settings.js | 96 +++++++++++++++++++ bin/smallcode.js | 57 ++++++++++- bin/token_monitor.js | 16 ++++ .../2026-06-14-live-activity-feed-design.md | 62 ++++++++++++ src/tui/fullscreen.js | 72 +++++++++++++- test/live_settings.test.js | 77 +++++++++++++++ test/live_tui.test.js | 72 ++++++++++++++ 8 files changed, 457 insertions(+), 5 deletions(-) create mode 100644 bin/live_settings.js create mode 100644 docs/plans/2026-06-14-live-activity-feed-design.md create mode 100644 test/live_settings.test.js create mode 100644 test/live_tui.test.js diff --git a/bin/commands.js b/bin/commands.js index b5c1444f..0016e5e2 100644 --- a/bin/commands.js +++ b/bin/commands.js @@ -1164,6 +1164,16 @@ module.exports = function createCommandHandler(config, conversationHistory, impr return; } + case '/live': { + // Toggle the live activity feed features (issue #77). + const { resolveLiveCommand } = require('./live_settings'); + const res = resolveLiveCommand(parts.slice(1).join(' ')); + console.log(res.text); + console.log(''); + rl.prompt(); + return; + } + case '/provider': { const sub = (parts[1] || '').trim(); if (sub === 'status' || sub === '--status' || sub === '-s') { diff --git a/bin/live_settings.js b/bin/live_settings.js new file mode 100644 index 00000000..2d22936d --- /dev/null +++ b/bin/live_settings.js @@ -0,0 +1,96 @@ +'use strict'; + +// SmallCode — live activity feed settings (issue #77) +// +// Four independently-toggleable live-output features, seeded from env and +// flipped at runtime by the `/live` command: +// tools — show a tool the moment it starts, update to ✓/✗ on completion +// context — live context-usage meter in the footer +// stream — stream the model reply token-by-token (changes request path) +// thinking — live dimmed preview of reasoning (changes request path) +// +// stream/thinking default OFF because they switch chatCompletion to a +// streaming request; tools/context default ON (pure display, no risk). + +const FEATURES = ['tools', 'context', 'stream', 'thinking']; + +const ENV = { + tools: 'SMALLCODE_LIVE_TOOLS', + context: 'SMALLCODE_LIVE_CONTEXT', + stream: 'SMALLCODE_LIVE_STREAM', + thinking: 'SMALLCODE_LIVE_THINKING', +}; + +const DEFAULTS = { tools: true, context: true, stream: false, thinking: false }; + +function _envBool(name, dflt) { + const v = process.env[name]; + if (v == null || v === '') return dflt; + return /^(1|true|on|yes|enabled?)$/i.test(String(v).trim()); +} + +let _settings = null; + +function getLiveSettings() { + if (!_settings) { + _settings = {}; + for (const f of FEATURES) _settings[f] = _envBool(ENV[f], DEFAULTS[f]); + } + return _settings; +} + +function setLive(feature, value) { + if (!FEATURES.includes(feature)) return false; + getLiveSettings()[feature] = !!value; + return true; +} + +// Parse an on/off token. Returns true, false, 'toggle' (empty), or null (bad). +function _parseValue(tok) { + if (tok == null || tok === '') return 'toggle'; + if (/^(1|true|on|yes|enabled?)$/i.test(tok)) return true; + if (/^(0|false|off|no|disabled?)$/i.test(tok)) return false; + return null; +} + +// Resolve a `/live …` argument string into a structured action: +// { action: 'status'|'set'|'error', feature?, value?, text } +// `text` is ready to print. Mutates settings on a successful 'set'. +function resolveLiveCommand(argStr) { + const s = String(argStr || '').trim(); + if (!s) return { action: 'status', text: formatStatus() }; + + const parts = s.split(/\s+/); + const feature = parts[0].toLowerCase(); + const val = _parseValue(parts[1]); + + if (feature === 'all') { + if (val === null) return { action: 'error', text: ` Invalid value "${parts[1]}". Use on|off.` }; + const v = val === 'toggle' ? true : val; + for (const f of FEATURES) setLive(f, v); + return { action: 'set', feature: 'all', value: v, text: formatStatus() }; + } + + if (!FEATURES.includes(feature)) { + return { action: 'error', text: ` Unknown feature "${feature}". Use: ${FEATURES.join(', ')} (or "all").` }; + } + if (val === null) return { action: 'error', text: ` Invalid value "${parts[1]}". Use on|off.` }; + + const current = getLiveSettings()[feature]; + const newVal = val === 'toggle' ? !current : val; + setLive(feature, newVal); + return { action: 'set', feature, value: newVal, text: ` live ${feature}: ${newVal ? 'on' : 'off'}` }; +} + +function formatStatus() { + const s = getLiveSettings(); + const lines = [' Live activity (issue #77):']; + for (const f of FEATURES) lines.push(` ${f.padEnd(9)} ${s[f] ? 'on' : 'off'}`); + lines.push(' Toggle: /live [on|off]'); + return lines.join('\n'); +} + +// Test-only: drop the cached singleton so env changes re-seed. +function _reset() { _settings = null; } + +module.exports = { FEATURES, getLiveSettings, setLive, resolveLiveCommand, formatStatus, _reset }; diff --git a/bin/smallcode.js b/bin/smallcode.js index 33be1773..45825a9e 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -134,6 +134,36 @@ let tokenTracker = null; // Fullscreen TUI reference for streaming (set when fullscreen mode is active) let _fullscreenRef = null; +// Live activity feed (issue #77). _activeToolHandle is the in-progress tool +// line started in the dispatch loop (runAgentLoop) and finished in the +// console.log override (runTUI) — module-scoped so both closures share it. +const { getLiveSettings } = require('./live_settings'); +let _activeToolHandle = null; + +// One-line summary of a tool's most salient argument, for the live ⚙ line. +function summarizeToolArgs(name, args) { + if (!args || typeof args !== 'object') return ''; + const a = args; + const clip = (s, n = 48) => { s = String(s).replace(/\s+/g, ' ').trim(); return s.length > n ? s.slice(0, n - 1) + '…' : s; }; + if (a.path) return clip(a.path); + if (a.command) return clip(a.command); + if (a.pattern) return clip(a.pattern); + if (a.query) return clip(a.query); + if (a.task) return clip(a.task); + if (a.name) return clip(a.name); + return ''; +} + +// Push the current context usage to the footer meter (gated by /live context). +function updateContextMeter() { + if (!_fullscreenRef || !getLiveSettings().context) return; + try { + const win = Number(config?.context?.detected_window) || 0; + const m = tokenMonitor.contextMeter(win); + if (m.window > 0) _fullscreenRef.setContextMeter(m.pct, m.used, m.window); + } catch {} +} + const VERSION = require('../package.json').version; const LOGO = ` ⚡ SmallCode v${VERSION} @@ -360,9 +390,18 @@ async function runTUI(config) { if (!clean) return; // Skip turn summaries unless verbose if (clean.startsWith('───') && !flags.verbose) return; - // Pair with current tool name for rich display + const isError = clean.startsWith('✗') || clean.includes('Exit code') || clean.includes('Timed out'); + // Live tools (issue #77): finish the in-progress ⚙ line in place, then + // refresh the context meter now that the tool changed context. + if (_activeToolHandle) { + screen.toolEnd(_activeToolHandle, isError ? 'err' : 'ok', clean); + _activeToolHandle = null; + _currentToolName = ''; + updateContextMeter(); + return; + } + // Classic path: pair with the captured tool name for rich display. if (_currentToolName) { - const isError = clean.startsWith('✗') || clean.includes('Exit code') || clean.includes('Timed out'); screen.addTool(_currentToolName, isError ? 'err' : 'ok', clean); _currentToolName = ''; } else { @@ -1018,6 +1057,9 @@ async function runAgentLoop(userMessage, config) { break; } + // Refresh the live context meter after each model turn (issue #77). + updateContextMeter(); + const message = response.choices?.[0]?.message; if (!message) break; @@ -1259,8 +1301,15 @@ async function runAgentLoop(userMessage, config) { } } - // Show what's happening - process.stdout.write(tui.toolStart(toolName)); + // Show what's happening. With live tools on (issue #77), push an + // in-progress ⚙ line now and rewrite it to ✓/✗ when the result lands + // (handled in the console.log override). Otherwise keep the classic + // capture-and-pair behavior. + if (_fullscreenRef && getLiveSettings().tools) { + _activeToolHandle = _fullscreenRef.toolStart(toolName, summarizeToolArgs(toolName, toolArgs)); + } else { + process.stdout.write(tui.toolStart(toolName)); + } const toolStart2 = Date.now(); const result = await executeTool(toolName, toolArgs); diff --git a/bin/token_monitor.js b/bin/token_monitor.js index d279dd6c..9bacedff 100644 --- a/bin/token_monitor.js +++ b/bin/token_monitor.js @@ -10,6 +10,7 @@ class TokenMonitor { this.totalCalls = 0; this.compactions = 0; this.evictions = 0; + this.lastPromptTokens = 0; this._nextCallIsNewTurn = false; } @@ -20,6 +21,9 @@ class TokenMonitor { this.totalPrompt += promptTokens || 0; this.totalCompletion += completionTokens || 0; this.totalCalls++; + // Most recent prompt size = how much context is currently in play. Drives + // the live context meter (issue #77). + this.lastPromptTokens = promptTokens || 0; if (!this.turns.length || metadata.newTurn || this._nextCallIsNewTurn) { this.turns.push({ calls: 0, promptTokens: 0, completionTokens: 0, toolCalls: 0 }); @@ -35,6 +39,18 @@ class TokenMonitor { recordCompaction() { this.compactions++; } recordEviction() { this.evictions++; } + /** + * Live context-usage snapshot for the TUI meter (issue #77). `window` is the + * model's context length in tokens. Returns { pct, used, window } where + * `used` is the most recent prompt size. + */ + contextMeter(window) { + const used = this.lastPromptTokens || 0; + const win = window || 0; + const pct = win > 0 ? (used / win) * 100 : 0; + return { pct, used, window: win }; + } + /** * Get efficiency metrics. */ diff --git a/docs/plans/2026-06-14-live-activity-feed-design.md b/docs/plans/2026-06-14-live-activity-feed-design.md new file mode 100644 index 00000000..829bcef4 --- /dev/null +++ b/docs/plans/2026-06-14-live-activity-feed-design.md @@ -0,0 +1,62 @@ +# Live Activity Feed (issue #77) + +Make the TUI show work as it happens instead of only finished tool results. +Four features, each independently toggleable, built in two phases. + +## Features & toggles + +Per-feature switches, runtime via `/live` and seeded from env: + +| Feature | What it adds | Env default | Default | +|-----------|----------------------------------------------------------|------------------------|---------| +| `tools` | Show a tool the moment it starts (`⚙ write_file: x.py`), update the same line to `✓`/`✗` on completion | `SMALLCODE_LIVE_TOOLS` | ON | +| `context` | Live context-usage meter in the footer, updated per action | `SMALLCODE_LIVE_CONTEXT` | ON | +| `stream` | Stream the model reply token-by-token into the chat | `SMALLCODE_LIVE_STREAM` | OFF (opt-in) | +| `thinking`| Live dimmed preview of reasoning as it streams | `SMALLCODE_LIVE_THINKING`| OFF (opt-in) | + +`/live` prints state; `/live [on|off]` toggles/sets one. +`stream`/`thinking` default OFF because they change the model request path. + +## Phase A — tool-start + context meter (no model-path change) + +- **`bin/live_settings.js`** (new): pure module. `getLiveSettings()` seeds from + env; `setLive(feature, value)`; `resolveLiveCommand(arg)` → `{ action, feature, + value, text }` for the `/live` command. Unit-testable in isolation. +- **TUI** (`src/tui/fullscreen.js`): + - `toolStart(name, detail)` → push an in-progress `⚙` line to chat + tool + panel, store its indices, return a handle `{ chatIdx, toolIdx }`. + - `toolEnd(handle, status, detail)` → rewrite that same line to `✓`/`✗`. + Falls back to `addTool` if the handle is missing. + - `setContextMeter(pct, used, window)` → footer indicator `ctx 42% (13k/32k)`. +- **TokenMonitor**: track `lastPromptTokens`; `contextMeter(window)` → `{ pct, + used, window }`. +- **Agent loop** (`bin/smallcode.js`): at the tool-dispatch site, when + `tools` on + fullscreen, `toolStart` before exec and `toolEnd` after — wired + through the existing `console.log`/`stdout.write` overrides so there is no + duplicate line. When off, the current behavior is unchanged. After each tool + and each turn, update `setContextMeter` when `context` on. +- **`/live` command**: handler in `bin/commands.js` + TUI palette entry. + +## Phase B — streaming + thinking (gated, isolated risk) + +- In `chatCompletion` (`bin/smallcode.js`), when `stream` on: set + `body.stream = true`, consume SSE incrementally (reuse the `model_client.js` + pattern), call `streamToken(delta.content)` for visible text, route + `reasoning_content`/`` deltas to a dimmed area when `thinking` on, + accumulate `tool_calls` deltas, then **reassemble the exact same `data` + object** the function returns today so all downstream logic is untouched. +- Any streaming error falls back to the response so far. The non-streaming + path (default) is left byte-for-byte unchanged. +- Extract SSE assembly into a testable helper. + +## Testing + +- `live_settings`: env parsing + `/live` resolution (pure unit tests). +- TUI: `toolStart`/`toolEnd` line mutation; `setContextMeter` formatting. +- Phase B: SSE-assembly helper fed canned chunks → assert assembled `data` + + `streamToken` call sequence. + +## Non-goals (YAGNI) + +- Persisting toggle state across restarts (env default + runtime only). +- A separate scrollable "activity" pane — reuse the existing chat + tool panel. diff --git a/src/tui/fullscreen.js b/src/tui/fullscreen.js index 582e52c5..477a7c01 100644 --- a/src/tui/fullscreen.js +++ b/src/tui/fullscreen.js @@ -177,6 +177,8 @@ class FullScreenTUI { // Panel content buffers this.chatLines = []; // Rendered chat messages + this._chatTrim = 0; // count of chatLines trimmed off the front (issue #77 toolEnd anchoring) + this.contextMeter = ''; // live context-usage indicator (issue #77) // Mouse text selection in the chat panel (drag to highlight, copy on // release). Anchored to chatLines indices so scrolling doesn't shift it. @@ -203,6 +205,7 @@ class FullScreenTUI { { cmd: '/stats', alias: null, desc: 'Session statistics' }, { cmd: '/tokens', alias: null, desc: 'Token usage report' }, { cmd: '/budget', alias: null, desc: 'Context window budget' }, + { cmd: '/live', alias: null, desc: 'Toggle live activity feed' }, { cmd: '/files', alias: null, desc: 'List project files' }, { cmd: '/diff', alias: null, desc: 'Git diff summary' }, { cmd: '/git', alias: null, desc: 'Run git command' }, @@ -645,9 +648,12 @@ class FullScreenTUI { actionStr = ' enter send │ /help commands'; } - // 2. Middle: Scroll & Token info + // 2. Middle: Scroll & Token info (+ live context meter — issue #77) let scrollStr = this.chatScroll < 0 ? '↑ scrolled' : ''; let tokenStr = this.tokenInfo ? `${this.tokenInfo}` : ''; + if (this.contextMeter) { + tokenStr = tokenStr ? `${this.contextMeter} │ ${tokenStr}` : this.contextMeter; + } let middleStr = ''; if (scrollStr && tokenStr) { middleStr = `${scrollStr} │ ${tokenStr}`; @@ -1124,6 +1130,7 @@ class FullScreenTUI { // thousands of lines; rendering stays fast by only keeping recent history. const MAX_CHAT_LINES = 5000; if (this.chatLines.length > MAX_CHAT_LINES) { + this._chatTrim += this.chatLines.length - MAX_CHAT_LINES; this.chatLines.splice(0, this.chatLines.length - MAX_CHAT_LINES); } @@ -1155,6 +1162,69 @@ class FullScreenTUI { this.render(); } + // Live in-progress tool line (issue #77). Pushes a ⚙ line to chat + tool + // panel and returns a handle so toolEnd() can rewrite it in place once the + // tool finishes — so the user sees "⚙ write_file: x.py" the moment it starts, + // not only the ✓ after it completes. The handle records absolute indices plus + // the trim offset at creation, so front-trimming of chatLines stays correct. + toolStart(name, detail) { + const iconColor = this.theme.accent; + const prefix = iconColor + ' TOOL ⚙ ' + this.theme.border + '│ ' + ANSI.reset; + const nameStr = name ? this.theme.accent + name + ANSI.reset + ': ' : ''; + const detailStr = (detail ? this.theme.muted + detail : this.theme.muted + 'running…') + ANSI.reset; + + const line = prefix + nameStr + detailStr; + const toolPanelLine = ` ${iconColor}⚙${ANSI.reset} ${nameStr}${detailStr}`; + const handle = { name, chatIdx: this.chatLines.length, toolIdx: this.toolLines.length, trim: this._chatTrim }; + + this.chatLines.push(line); + this.toolLines.push(toolPanelLine); + this.chatScroll = 0; + this.render(); + return handle; + } + + // Finish a live tool line started by toolStart(): rewrite it to ✓/✗ in place. + // Falls back to appending a fresh line (addTool) if the original scrolled out + // of the retained window or no handle was supplied. + toolEnd(handle, status, detail) { + if (!handle || handle.chatIdx == null) { this.addTool(handle && handle.name, status, detail); return; } + + let icon = '⚙', iconColor = this.theme.accent; + if (status === 'ok') { icon = '✓'; iconColor = this.theme.success; } + else if (status === 'err') { icon = '✗'; iconColor = this.theme.error; } + + const name = handle.name; + const prefix = iconColor + ' TOOL ' + icon + ' ' + this.theme.border + '│ ' + ANSI.reset; + const nameStr = name ? this.theme.accent + name + ANSI.reset + ': ' : ''; + const detailStr = detail ? this.theme.muted + detail + ANSI.reset : ''; + const line = prefix + nameStr + detailStr; + const toolPanelLine = ` ${iconColor}${icon}${ANSI.reset} ${nameStr}${detailStr}`; + + const chatIdx = handle.chatIdx - (this._chatTrim - (handle.trim || 0)); + if (chatIdx >= 0 && chatIdx < this.chatLines.length) { + this.chatLines[chatIdx] = line; + } else { + this.chatLines.push(line); // scrolled out of the retained window + } + if (handle.toolIdx != null && handle.toolIdx < this.toolLines.length) { + this.toolLines[handle.toolIdx] = toolPanelLine; + } else { + this.toolLines.push(toolPanelLine); + } + this.render(); + } + + // Live context-usage meter (issue #77). `pct` is 0-100; used/window are token + // counts. Rendered in the status footer alongside the token info. + setContextMeter(pct, used, window) { + if (pct == null) { this.contextMeter = ''; this.render(); return; } + const p = Math.max(0, Math.min(100, Math.round(pct))); + const fmt = (n) => n >= 1000 ? `${(n / 1000).toFixed(1)}k` : String(n); + this.contextMeter = window ? `ctx ${p}% (${fmt(used)}/${fmt(window)})` : `ctx ${p}%`; + this.render(); + } + // Show a diff in the chat panel (non-blocking, inline) addDiff(filePath, oldStr, newStr, lineNum) { const t = this.theme; diff --git a/test/live_settings.test.js b/test/live_settings.test.js new file mode 100644 index 00000000..735b905b --- /dev/null +++ b/test/live_settings.test.js @@ -0,0 +1,77 @@ +'use strict'; + +// SmallCode — live activity settings + /live command tests (issue #77) + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const live = require('../bin/live_settings'); + +function withEnv(vars, fn) { + const saved = {}; + for (const k of Object.keys(vars)) { saved[k] = process.env[k]; if (vars[k] == null) delete process.env[k]; else process.env[k] = vars[k]; } + live._reset(); + try { return fn(); } finally { + for (const k of Object.keys(vars)) { if (saved[k] == null) delete process.env[k]; else process.env[k] = saved[k]; } + live._reset(); + } +} + +test('defaults: tools/context on, stream/thinking off', () => { + withEnv({ SMALLCODE_LIVE_TOOLS: null, SMALLCODE_LIVE_CONTEXT: null, SMALLCODE_LIVE_STREAM: null, SMALLCODE_LIVE_THINKING: null }, () => { + assert.deepEqual(live.getLiveSettings(), { tools: true, context: true, stream: false, thinking: false }); + }); +}); + +test('env overrides seed the settings', () => { + withEnv({ SMALLCODE_LIVE_TOOLS: 'off', SMALLCODE_LIVE_STREAM: 'true' }, () => { + const s = live.getLiveSettings(); + assert.equal(s.tools, false); + assert.equal(s.stream, true); + }); +}); + +test('/live with no arg returns status without mutating', () => { + withEnv({}, () => { + const r = live.resolveLiveCommand(''); + assert.equal(r.action, 'status'); + assert.match(r.text, /tools/); + assert.match(r.text, /thinking/); + }); +}); + +test('/live on|off sets explicitly', () => { + withEnv({}, () => { + assert.equal(live.resolveLiveCommand('stream on').value, true); + assert.equal(live.getLiveSettings().stream, true); + assert.equal(live.resolveLiveCommand('stream off').value, false); + assert.equal(live.getLiveSettings().stream, false); + }); +}); + +test('/live with no value toggles', () => { + withEnv({}, () => { + const before = live.getLiveSettings().tools; // default true + const r = live.resolveLiveCommand('tools'); + assert.equal(r.value, !before); + assert.equal(live.getLiveSettings().tools, !before); + }); +}); + +test('/live all on|off sets every feature', () => { + withEnv({}, () => { + live.resolveLiveCommand('all off'); + assert.deepEqual(live.getLiveSettings(), { tools: false, context: false, stream: false, thinking: false }); + live.resolveLiveCommand('all on'); + assert.deepEqual(live.getLiveSettings(), { tools: true, context: true, stream: true, thinking: true }); + }); +}); + +test('unknown feature and bad value produce errors, no mutation', () => { + withEnv({}, () => { + const before = { ...live.getLiveSettings() }; + assert.equal(live.resolveLiveCommand('bogus on').action, 'error'); + assert.equal(live.resolveLiveCommand('stream maybe').action, 'error'); + assert.deepEqual(live.getLiveSettings(), before); + }); +}); diff --git a/test/live_tui.test.js b/test/live_tui.test.js new file mode 100644 index 00000000..295d0097 --- /dev/null +++ b/test/live_tui.test.js @@ -0,0 +1,72 @@ +'use strict'; + +// SmallCode — live TUI primitives (issue #77) +// toolStart/toolEnd rewrite a single tool line in place; setContextMeter +// formats the footer indicator; TokenMonitor.contextMeter reports usage. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { FullScreenTUI } = require('../src/tui/fullscreen'); +const { TokenMonitor } = require('../bin/token_monitor'); + +function makeTui() { + const tui = new FullScreenTUI(); + tui.render = () => {}; + return tui; +} + +const strip = (s) => s.replace(/\x1b\[[0-9;]*m/g, ''); + +test('toolStart pushes one ⚙ line; toolEnd rewrites the SAME line in place', () => { + const tui = makeTui(); + const before = tui.chatLines.length; + const h = tui.toolStart('write_file', 'hello.py'); + assert.equal(tui.chatLines.length, before + 1, 'exactly one line added'); + assert.match(strip(tui.chatLines[h.chatIdx]), /⚙.*write_file.*hello\.py/); + + tui.toolEnd(h, 'ok', 'wrote 12 lines'); + assert.equal(tui.chatLines.length, before + 1, 'no extra line on completion'); + assert.match(strip(tui.chatLines[h.chatIdx]), /✓.*write_file.*wrote 12 lines/); +}); + +test('toolEnd marks errors with ✗', () => { + const tui = makeTui(); + const h = tui.toolStart('bash', 'npm test'); + tui.toolEnd(h, 'err', 'Exit code 1'); + assert.match(strip(tui.chatLines[h.chatIdx]), /✗.*bash.*Exit code 1/); +}); + +test('toolEnd survives interleaved lines (index stays anchored)', () => { + const tui = makeTui(); + const h = tui.toolStart('read_file', 'a.js'); + tui.addTool('router', 'ok', 'plan'); // unrelated line pushed in between + tui.toolEnd(h, 'ok', 'read 40 lines'); + assert.match(strip(tui.chatLines[h.chatIdx]), /✓.*read_file.*read 40 lines/); + assert.match(strip(tui.chatLines[h.chatIdx + 1]), /router/); // the interleaved line is intact +}); + +test('toolEnd falls back to a fresh line when the handle is missing', () => { + const tui = makeTui(); + const before = tui.chatLines.length; + tui.toolEnd(null, 'ok', 'orphan'); + assert.equal(tui.chatLines.length, before + 1); +}); + +test('setContextMeter formats percent + token counts', () => { + const tui = makeTui(); + tui.setContextMeter(42, 13000, 32000); + assert.equal(tui.contextMeter, 'ctx 42% (13.0k/32.0k)'); + tui.setContextMeter(null); + assert.equal(tui.contextMeter, ''); +}); + +test('TokenMonitor.contextMeter reports last prompt vs window', () => { + const tm = new TokenMonitor(); + tm.recordCall(8000, 200); + tm.recordCall(16000, 300); // most recent prompt = 16000 + const m = tm.contextMeter(32000); + assert.equal(m.used, 16000); + assert.equal(m.window, 32000); + assert.equal(Math.round(m.pct), 50); +}); From 440fc8827f956e650c5d9f99d9f8e063b7d19820 Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 12:04:04 -0700 Subject: [PATCH 25/27] =?UTF-8?q?feat(tui):=20live=20activity=20feed=20?= =?UTF-8?q?=E2=80=94=20Phase=20B:=20streaming=20+=20thinking=20(#77)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Opt-in live streaming of the model reply and a live reasoning preview, gated behind /live stream and /live thinking (both default OFF). - bin/stream_assembler.js: pure StreamAssembler + parseSSEBuffer. Folds streamed OpenAI chunks (content, reasoning_content, tool_call deltas, usage, finish_reason) back into the exact non-streaming `data` shape, so all downstream chatCompletion logic is untouched. Buffer parser tolerates lines split across network reads. - chatCompletion: when /live stream is on AND a fullscreen TUI is attached, request stream:true (+ stream_options.include_usage), consume the SSE via the assembler, drive streamToken (and streamThinking when /live thinking is on) as tokens arrive, then return the reassembled data. Any streaming error falls back to what was assembled. The non-streaming default path is unchanged; the error-retry is forced non-streamed so its JSON parse stays valid. - TUI: streamThinking() renders a single collapsing dimmed [thinking] line, reset at turn boundaries by endStream(). - Suppress the post-turn addChat('assistant') when content was already shown live, to avoid double-rendering. Adds test/stream_assembler.test.js (8) incl. split-buffer reconstruction, parallel tool_calls, reasoning routing, and usage capture. Full suite: 473. Note: the SSE assembler is fully unit-tested, but the live streaming path has not been exercised against a real streaming endpoint here — smoke-test with `/live stream on` against your local model. Co-Authored-By: Claude Opus 4.8 --- bin/smallcode.js | 58 +++++++++++++++++-- bin/stream_assembler.js | 87 ++++++++++++++++++++++++++++ src/tui/fullscreen.js | 19 +++++++ test/stream_assembler.test.js | 103 ++++++++++++++++++++++++++++++++++ 4 files changed, 262 insertions(+), 5 deletions(-) create mode 100644 bin/stream_assembler.js create mode 100644 test/stream_assembler.test.js diff --git a/bin/smallcode.js b/bin/smallcode.js index 45825a9e..9cb7d9fc 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -139,6 +139,9 @@ let _fullscreenRef = null; // console.log override (runTUI) — module-scoped so both closures share it. const { getLiveSettings } = require('./live_settings'); let _activeToolHandle = null; +// True when the current turn's assistant content was already shown live via +// streamToken, so the post-turn addChat('assistant') must not render it again. +let _contentStreamed = false; // One-line summary of a tool's most salient argument, for the live ⚙ line. function summarizeToolArgs(name, args) { @@ -665,7 +668,7 @@ async function runAgentLoop(userMessage, config) { if (message?.content) { conversationHistory.push({ role: 'assistant', content: message.content }); if (_fullscreenRef) { - _fullscreenRef.addChat('assistant', message.content); + if (!_contentStreamed) _fullscreenRef.addChat('assistant', message.content); } else { process.stdout.write(tui.renderMarkdown(message.content)); } @@ -1902,9 +1905,9 @@ Read the FULL file above carefully. Fix ALL errors. Use the patch tool with the } } } catch {} - // Render with markdown highlighting + // Render with markdown highlighting (skip if already shown live — #77) if (_fullscreenRef) { - _fullscreenRef.addChat('assistant', message.content); + if (!_contentStreamed) _fullscreenRef.addChat('assistant', message.content); } else { process.stdout.write(tui.renderMarkdown(message.content)); } @@ -2523,6 +2526,15 @@ async function chatCompletion(config, messages) { } } + // Live streaming (issue #77, Phase B): opt-in via /live stream. Only when a + // fullscreen TUI is attached to receive tokens. Request usage in the final + // chunk so the context meter still updates. + const wantStream = !!(_fullscreenRef && getLiveSettings().stream); + if (wantStream) { + body.stream = true; + body.stream_options = { include_usage: true }; + } + let response; try { response = await fetch(`${baseUrl}/chat/completions`, { @@ -2574,7 +2586,8 @@ async function chatCompletion(config, messages) { const retry = await fetch(`${baseUrl}/chat/completions`, { method: 'POST', headers, - body: JSON.stringify(body), + // Retry non-streamed so the JSON parse below is unambiguous. + body: JSON.stringify({ ...body, stream: false, stream_options: undefined }), }); if (retry.ok) return await retry.json(); } catch {} @@ -2587,7 +2600,42 @@ async function chatCompletion(config, messages) { return null; } - const data = await response.json(); + // Consume the response. When streaming (Phase B), assemble the SSE deltas + // back into the same `data` shape the non-streaming path produces, driving + // the live chat/thinking views as tokens arrive. On any streaming failure, + // fall back to whatever was assembled so far. The non-streaming path is + // unchanged. + let data; + if (wantStream && response.body && typeof response.body.getReader === 'function') { + const { StreamAssembler, parseSSEBuffer } = require('./stream_assembler'); + const assembler = new StreamAssembler(); + const showThinking = getLiveSettings().thinking; + try { + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buf = ''; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buf += decoder.decode(value, { stream: true }); + const { events, rest } = parseSSEBuffer(buf); + buf = rest; + for (const ev of events) { + if (ev.done || !ev.json) continue; + assembler.pushChunk(ev.json, { + onContent: (t) => { if (_fullscreenRef) _fullscreenRef.streamToken(t); }, + onReasoning: showThinking ? (t) => { if (_fullscreenRef) _fullscreenRef.streamThinking(t); } : undefined, + }); + } + } + } catch { /* fall through with whatever assembled so far */ } + if (_fullscreenRef) _fullscreenRef.endStream(); + data = assembler.toData(); + _contentStreamed = !!(_fullscreenRef && assembler.content); + } else { + data = await response.json(); + _contentStreamed = false; + } // Length-truncation recovery: reasoning models served via LM Studio // (lfm2.x, Qwen3, DeepSeek R1) expose a separate `reasoning_content` diff --git a/bin/stream_assembler.js b/bin/stream_assembler.js new file mode 100644 index 00000000..8f026994 --- /dev/null +++ b/bin/stream_assembler.js @@ -0,0 +1,87 @@ +'use strict'; + +// SmallCode — OpenAI SSE stream assembler (issue #77, Phase B) +// +// Reassembles a streamed chat completion (stream:true) into the exact same +// non-streaming `data` object the rest of chatCompletion expects, so all the +// downstream logic (tool-call extraction, length recovery, usage) is untouched. +// Pure and side-effect-free except for the optional onContent/onReasoning +// callbacks, which exist purely to drive the live TUI. + +class StreamAssembler { + constructor() { + this.content = ''; + this.reasoning = ''; + this.toolCalls = []; // index → { id, type, function: { name, arguments } } + this.finishReason = null; + this.usage = null; + } + + // Fold one parsed OpenAI streaming chunk into the running state. + pushChunk(obj, { onContent, onReasoning } = {}) { + if (!obj || typeof obj !== 'object') return; + const choice = obj.choices && obj.choices[0]; + if (choice) { + const delta = choice.delta || {}; + if (typeof delta.content === 'string' && delta.content) { + this.content += delta.content; + if (onContent) onContent(delta.content); + } + // Reasoning models (Qwen3, DeepSeek R1) stream a separate field. + const reason = delta.reasoning_content; + if (typeof reason === 'string' && reason) { + this.reasoning += reason; + if (onReasoning) onReasoning(reason); + } + if (Array.isArray(delta.tool_calls)) { + for (const tc of delta.tool_calls) { + const idx = Number.isInteger(tc.index) ? tc.index : 0; + if (!this.toolCalls[idx]) { + this.toolCalls[idx] = { id: tc.id || `call_${idx}`, type: 'function', function: { name: '', arguments: '' } }; + } + const slot = this.toolCalls[idx]; + if (tc.id) slot.id = tc.id; + if (tc.type) slot.type = tc.type; + if (tc.function && tc.function.name) slot.function.name += tc.function.name; + if (tc.function && typeof tc.function.arguments === 'string') slot.function.arguments += tc.function.arguments; + } + } + if (choice.finish_reason) this.finishReason = choice.finish_reason; + } + // Final chunk (with stream_options.include_usage) carries usage. + if (obj.usage) this.usage = obj.usage; + } + + // Build the OpenAI-compatible non-streaming response object. + toData() { + const message = { role: 'assistant', content: this.content }; + const tcs = this.toolCalls.filter(Boolean); + if (tcs.length) message.tool_calls = tcs; + if (this.reasoning) message.reasoning_content = this.reasoning; + return { + choices: [{ message, finish_reason: this.finishReason || 'stop' }], + usage: this.usage || undefined, + }; + } +} + +// Split an accumulating SSE text buffer into complete events. Returns +// { events, rest } where `rest` is the trailing partial line to carry over to +// the next read. Each event is { json } or { done: true }. +function parseSSEBuffer(buffer) { + const events = []; + let rest = String(buffer || ''); + let nl; + while ((nl = rest.indexOf('\n')) !== -1) { + const line = rest.slice(0, nl).trim(); + rest = rest.slice(nl + 1); + if (!line || !line.startsWith('data:')) continue; + const payload = line.slice(5).trim(); + if (payload === '[DONE]') { events.push({ done: true }); continue; } + try { events.push({ json: JSON.parse(payload) }); } + catch { /* malformed/partial — drop this line */ } + } + return { events, rest }; +} + +module.exports = { StreamAssembler, parseSSEBuffer }; diff --git a/src/tui/fullscreen.js b/src/tui/fullscreen.js index 477a7c01..20153477 100644 --- a/src/tui/fullscreen.js +++ b/src/tui/fullscreen.js @@ -1304,8 +1304,27 @@ class FullScreenTUI { this.render(); } + // Live dimmed reasoning preview (issue #77, Phase B). Streams thinking tokens + // into a single collapsing dimmed line so the user can watch the model reason + // without flooding the chat. Reset by endStream() at turn boundaries. + streamThinking(token) { + const dim = '\x1b[2m'; + const prefix = ' ' + this.theme.border + '│ ' + ANSI.reset + dim + '[thinking] '; + if (this._thinkingLineIdx == null || this._thinkingLineIdx >= this.chatLines.length) { + this._thinkingLineIdx = this.chatLines.length; + this._thinkingText = ''; + this.chatLines.push(prefix + ANSI.reset); + } + this._thinkingText += token; + const tail = this._thinkingText.replace(/\s+/g, ' ').trim().slice(-120); + this.chatLines[this._thinkingLineIdx] = prefix + tail + ANSI.reset; + this.chatScroll = 0; + this.render(); + } + endStream() { this._lastLineIsStreaming = false; + this._thinkingLineIdx = null; this.chatLines.push(''); this.render(); } diff --git a/test/stream_assembler.test.js b/test/stream_assembler.test.js new file mode 100644 index 00000000..ae1d2f72 --- /dev/null +++ b/test/stream_assembler.test.js @@ -0,0 +1,103 @@ +'use strict'; + +// SmallCode — SSE stream assembler tests (issue #77, Phase B) +// Reassemble streamed OpenAI chunks into the non-streaming `data` shape and +// drive the live callbacks. Buffer parsing must tolerate split lines. + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { StreamAssembler, parseSSEBuffer } = require('../bin/stream_assembler'); + +function chunk(delta, finish, usage) { + const o = { choices: [{ delta: delta || {}, finish_reason: finish || null }] }; + if (usage) o.usage = usage; + return o; +} + +test('assembles streamed content into one message', () => { + const a = new StreamAssembler(); + const seen = []; + a.pushChunk(chunk({ content: 'Hel' }), { onContent: (t) => seen.push(t) }); + a.pushChunk(chunk({ content: 'lo' }), { onContent: (t) => seen.push(t) }); + a.pushChunk(chunk({}, 'stop')); + const data = a.toData(); + assert.equal(data.choices[0].message.content, 'Hello'); + assert.equal(data.choices[0].finish_reason, 'stop'); + assert.deepEqual(seen, ['Hel', 'lo']); + assert.equal(data.choices[0].message.tool_calls, undefined); +}); + +test('accumulates tool_call deltas across chunks', () => { + const a = new StreamAssembler(); + a.pushChunk(chunk({ tool_calls: [{ index: 0, id: 'c1', function: { name: 'write_', arguments: '{"pa' } }] })); + a.pushChunk(chunk({ tool_calls: [{ index: 0, function: { name: 'file', arguments: 'th":"x.py"}' } }] })); + a.pushChunk(chunk({}, 'tool_calls')); + const tc = a.toData().choices[0].message.tool_calls; + assert.equal(tc.length, 1); + assert.equal(tc[0].id, 'c1'); + assert.equal(tc[0].function.name, 'write_file'); + assert.deepEqual(JSON.parse(tc[0].function.arguments), { path: 'x.py' }); +}); + +test('parallel tool_calls keyed by index', () => { + const a = new StreamAssembler(); + a.pushChunk(chunk({ tool_calls: [{ index: 0, id: 'a', function: { name: 'read_file', arguments: '{}' } }] })); + a.pushChunk(chunk({ tool_calls: [{ index: 1, id: 'b', function: { name: 'bash', arguments: '{}' } }] })); + const tc = a.toData().choices[0].message.tool_calls; + assert.equal(tc.length, 2); + assert.deepEqual(tc.map(t => t.function.name), ['read_file', 'bash']); +}); + +test('routes reasoning_content to onReasoning and into the message', () => { + const a = new StreamAssembler(); + const think = []; + a.pushChunk(chunk({ reasoning_content: 'let me ' }), { onReasoning: (t) => think.push(t) }); + a.pushChunk(chunk({ reasoning_content: 'think' }), { onReasoning: (t) => think.push(t) }); + a.pushChunk(chunk({ content: 'answer' }, 'stop')); + const data = a.toData(); + assert.deepEqual(think, ['let me ', 'think']); + assert.equal(data.choices[0].message.reasoning_content, 'let me think'); + assert.equal(data.choices[0].message.content, 'answer'); +}); + +test('captures usage from the final chunk', () => { + const a = new StreamAssembler(); + a.pushChunk(chunk({ content: 'hi' })); + a.pushChunk(chunk({}, 'stop', { prompt_tokens: 100, completion_tokens: 5, total_tokens: 105 })); + assert.deepEqual(a.toData().usage, { prompt_tokens: 100, completion_tokens: 5, total_tokens: 105 }); +}); + +test('parseSSEBuffer extracts complete events and keeps the partial tail', () => { + const raw = 'data: {"choices":[{"delta":{"content":"A"}}]}\n' + + 'data: [DONE]\n' + + 'data: {"choices":[{"delta":{"con'; // split mid-line + const { events, rest } = parseSSEBuffer(raw); + assert.equal(events.length, 2); + assert.equal(events[0].json.choices[0].delta.content, 'A'); + assert.equal(events[1].done, true); + assert.equal(rest, 'data: {"choices":[{"delta":{"con'); // carried over +}); + +test('parseSSEBuffer ignores non-data and blank lines', () => { + const { events } = parseSSEBuffer(': comment\n\nevent: foo\ndata: {"x":1}\n'); + assert.equal(events.length, 1); + assert.deepEqual(events[0].json, { x: 1 }); +}); + +test('end-to-end: split-buffer feed reconstructs the full message', () => { + // Simulate two network reads that split a data line down the middle. + const a = new StreamAssembler(); + let buf = ''; + const reads = [ + 'data: {"choices":[{"delta":{"content":"Hel"}}]}\ndata: {"choices":[{"delta":{"cont', + 'ent":"lo"}}]}\ndata: {"choices":[{"delta":{},"finish_reason":"stop"}]}\ndata: [DONE]\n', + ]; + for (const r of reads) { + buf += r; + const { events, rest } = parseSSEBuffer(buf); + buf = rest; + for (const ev of events) if (ev.json) a.pushChunk(ev.json); + } + assert.equal(a.toData().choices[0].message.content, 'Hello'); +}); From 767d178180abb4be05b7412b71fe463b6bcd9d86 Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 12:22:57 -0700 Subject: [PATCH 26/27] fix(quality-monitor): validate hallucination check against full tool registry The hallucination check scoped knownTools to currentToolCategory, causing false "Tool X does not exist" steers when a real tool was invoked from a different category (the dispatcher widens to all essential tools and runs it). Validate against the full registry (getAllTools(config, null)) instead. Co-Authored-By: Claude Opus 4.8 --- bin/smallcode.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/bin/smallcode.js b/bin/smallcode.js index 9cb7d9fc..67a3517f 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -1164,7 +1164,15 @@ async function runAgentLoop(userMessage, config) { // SMALLCODE_QUALITY_MONITOR=false. try { if (String(process.env.SMALLCODE_QUALITY_MONITOR || 'true').toLowerCase() !== 'false') { - const knownTools = getAllTools(config, currentToolCategory) + // Hallucination check must validate against the FULL tool registry + // (all categories), NOT the current router category. A real tool + // invoked from a different category — e.g. write_file while the + // two-stage router has the model in 'read' — is NOT hallucinated: the + // dispatcher widens currentToolCategory to 'plan' (all essential tools) + // and runs it. Scoping knownTools to currentToolCategory caused false + // "Tool write_file does not exist" steers that derailed small models + // mid-task (e.g. minimax could never write a step's output file). + const knownTools = getAllTools(config, null) .map(t => t && t.function && t.function.name) .filter(Boolean); const signal = qualityMonitor.inspect({ message, knownTools }); From 6892641f6aa2b8b379c2b055673bd68e194ede4a Mon Sep 17 00:00:00 2001 From: shuff57 Date: Sun, 14 Jun 2026 12:25:24 -0700 Subject: [PATCH 27/27] fix(deps): restore lockfile after stale dependabot hono merge The dependabot/hono branch was cut from an old base; its 3-way merge stripped valid lockfile entries (playwright-extra, puppeteer-extra-plugin-stealth, rimraf, fs-extra, and ~25 others) that would break `npm ci`. hono is not a declared dependency, so the bump itself is a no-op. Restore package-lock.json to integration's current, correct state. Co-Authored-By: Claude Opus 4.8 --- package-lock.json | 433 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 428 insertions(+), 5 deletions(-) diff --git a/package-lock.json b/package-lock.json index ed03231e..10cfaca4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,8 +19,7 @@ "bin": { "smallcode": "bin/smallcode.js", "smallcode-init": "bin/init.js", - "smallcode-rag-index": "bin/rag-index.js", - "smolv2": "bin/smallcode.js" + "smallcode-rag-index": "bin/rag-index.js" }, "devDependencies": { "@types/node": "^25.9.0", @@ -1232,6 +1231,17 @@ "@babel/types": "^7.28.2" } }, + "node_modules/@types/debug": { + "version": "4.1.13", + "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.13.tgz", + "integrity": "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "@types/ms": "*" + } + }, "node_modules/@types/istanbul-lib-coverage": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz", @@ -1259,6 +1269,14 @@ "@types/istanbul-lib-report": "*" } }, + "node_modules/@types/ms": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz", + "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==", + "license": "MIT", + "optional": true, + "peer": true + }, "node_modules/@types/node": { "version": "25.9.0", "resolved": "https://registry.npmjs.org/@types/node/-/node-25.9.0.tgz", @@ -1736,6 +1754,17 @@ "sprintf-js": "~1.0.2" } }, + "node_modules/arr-union": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz", + "integrity": "sha512-sKpyeERZ02v1FeCZT8lrfJq5u6goHCtpTAzPwJYe7c8SPFOboNjNg1vz2L4VTn9T4PQxEx13TbXLmYUcS6Ug7Q==", + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/babel-jest": { "version": "30.4.1", "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-30.4.1.tgz", @@ -2236,6 +2265,24 @@ "wrap-ansi": "^7.0.0" } }, + "node_modules/clone-deep": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz", + "integrity": "sha512-we+NuQo2DHhSl+DP6jlUiAhyAjBQrYnpOk15rN6c6JSPScjiCLh8IbSU+VTcph6YS3o7mASE8a0+gbZ7ChLpgg==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "for-own": "^0.1.3", + "is-plain-object": "^2.0.1", + "kind-of": "^3.0.2", + "lazy-cache": "^1.0.3", + "shallow-clone": "^0.1.2" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/co": { "version": "4.6.0", "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", @@ -2874,6 +2921,31 @@ "node": ">=8" } }, + "node_modules/for-in": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/for-in/-/for-in-1.0.2.tgz", + "integrity": "sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ==", + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/for-own": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz", + "integrity": "sha512-SKmowqGTJoPzLO1T0BBJpkfp3EMacCMOuH40hOUbrbzElVktk4DioXVM99QkLCyKoiuOmyjgcWMpVz2xjE7LZw==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "for-in": "^1.0.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/forwarded": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", @@ -2899,6 +2971,22 @@ "license": "MIT", "optional": true }, + "node_modules/fs-extra": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz", + "integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=12" + } + }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -3089,9 +3177,9 @@ } }, "node_modules/hono": { - "version": "4.12.23", - "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.23.tgz", - "integrity": "sha512-eIaZ9qDgu7XV0pxOCrg7/WhnQ6Ivm22UcxhXx/A3dcbqbbYgBEkc6e/J/s7j2tS96zoB0S9VBdLwQNCWwUo4LA==", + "version": "4.12.19", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.19.tgz", + "integrity": "sha512-xa3eYXYXx68XTT4hZ7dRzsXBhaq85ToSrlUJNoR0gwz/1Ap/CNwX47wfvV7pc/xWhjKVVkLT7zBJy8chhNguqQ==", "license": "MIT", "optional": true, "engines": { @@ -3241,6 +3329,25 @@ "dev": true, "license": "MIT" }, + "node_modules/is-buffer": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", + "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==", + "license": "MIT", + "optional": true, + "peer": true + }, + "node_modules/is-extendable": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz", + "integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==", + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/is-fullwidth-code-point": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", @@ -3260,6 +3367,20 @@ "node": ">=6" } }, + "node_modules/is-plain-object": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz", + "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "isobject": "^3.0.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/is-promise": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz", @@ -3286,6 +3407,17 @@ "devOptional": true, "license": "ISC" }, + "node_modules/isobject": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", + "integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==", + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/istanbul-lib-coverage": { "version": "3.2.2", "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz", @@ -4085,6 +4217,45 @@ "node": ">=6" } }, + "node_modules/jsonfile": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.1.tgz", + "integrity": "sha512-zwOTdL3rFQ/lRdBnntKVOX6k5cKJwEc1HdilT71BWEu7J41gXIB2MRp+vxduPSwZJPWBxEzv4yH1wYLJGUHX4Q==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "universalify": "^2.0.0" + }, + "optionalDependencies": { + "graceful-fs": "^4.1.6" + } + }, + "node_modules/kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "is-buffer": "^1.1.5" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/lazy-cache": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-1.0.4.tgz", + "integrity": "sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ==", + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/leven": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", @@ -4249,6 +4420,22 @@ "node": ">= 0.8" } }, + "node_modules/merge-deep": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/merge-deep/-/merge-deep-3.0.3.tgz", + "integrity": "sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "arr-union": "^3.1.0", + "clone-deep": "^0.2.4", + "kind-of": "^3.0.2" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/merge-descriptors": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz", @@ -4352,6 +4539,32 @@ "node": ">=16 || 14 >=14.17" } }, + "node_modules/mixin-object": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mixin-object/-/mixin-object-2.0.1.tgz", + "integrity": "sha512-ALGF1Jt9ouehcaXaHhn6t1yGWRqGaHkPFndtFVHfZXOvkIZ/yoGaSi0AHVTafb3ZBGg4dr/bDwnaEKqCXzchMA==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "for-in": "^0.1.3", + "is-extendable": "^0.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/mixin-object/node_modules/for-in": { + "version": "0.1.8", + "resolved": "https://registry.npmjs.org/for-in/-/for-in-0.1.8.tgz", + "integrity": "sha512-F0to7vbBSHP8E3l6dCjxNOLuSFAACIxFy3UehTUlG7svlXi37HHsDkyVcHo0Pq8QwrE+pXvWSVX3ZT1T9wAZ9g==", + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/mkdirp-classic": { "version": "0.5.3", "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", @@ -4606,6 +4819,14 @@ "node": ">=6" } }, + "node_modules/package-json-from-dist": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", + "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", + "license": "BlueOak-1.0.0", + "optional": true, + "peer": true + }, "node_modules/parse-json": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", @@ -4765,6 +4986,20 @@ "node": ">=8" } }, + "node_modules/playwright-extra": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/playwright-extra/-/playwright-extra-4.3.0.tgz", + "integrity": "sha512-/Hec3BmYMY/GznBo0ZPsSqm6IHil7jInxLz+9/UnBBC5Ozh2abVNnv+vYNJ+JKKRVtiKyHCrJpKRibJ9uribZw==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "debug": "^4.3.4" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/prebuild-install": { "version": "7.1.3", "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", @@ -4846,6 +5081,120 @@ "once": "^1.3.1" } }, + "node_modules/puppeteer-extra-plugin": { + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin/-/puppeteer-extra-plugin-3.2.3.tgz", + "integrity": "sha512-6RNy0e6pH8vaS3akPIKGg28xcryKscczt4wIl0ePciZENGE2yoaQJNd17UiEbdmh5/6WW6dPcfRWT9lxBwCi2Q==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "@types/debug": "^4.1.0", + "debug": "^4.1.1", + "merge-deep": "^3.0.1" + }, + "engines": { + "node": ">=9.11.2" + }, + "peerDependencies": { + "playwright-extra": "*", + "puppeteer-extra": "*" + }, + "peerDependenciesMeta": { + "playwright-extra": { + "optional": true + }, + "puppeteer-extra": { + "optional": true + } + } + }, + "node_modules/puppeteer-extra-plugin-stealth": { + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-stealth/-/puppeteer-extra-plugin-stealth-2.11.0.tgz", + "integrity": "sha512-BqckPV95MHP25quZgzBnZJD8S38ZYP4B3HJ3Kr/vibqxJxhK6L1VQ6jnu/JcFKV0wzCIQPrCiiavZnwE5u1C2A==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "debug": "^4.1.1", + "puppeteer-extra-plugin": "^3.2.2", + "puppeteer-extra-plugin-user-preferences": "^2.4.0" + }, + "engines": { + "node": ">=8" + }, + "peerDependencies": { + "playwright-extra": "*", + "puppeteer-extra": "*" + }, + "peerDependenciesMeta": { + "playwright-extra": { + "optional": true + }, + "puppeteer-extra": { + "optional": true + } + } + }, + "node_modules/puppeteer-extra-plugin-user-data-dir": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-data-dir/-/puppeteer-extra-plugin-user-data-dir-2.4.1.tgz", + "integrity": "sha512-kH1GnCcqEDoBXO7epAse4TBPJh9tEpVEK/vkedKfjOVOhZAvLkHGc9swMs5ChrJbRnf8Hdpug6TJlEuimXNQ+g==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "debug": "^4.1.1", + "fs-extra": "^10.0.0", + "puppeteer-extra-plugin": "^3.2.3", + "rimraf": "^3.0.2" + }, + "engines": { + "node": ">=8" + }, + "peerDependencies": { + "playwright-extra": "*", + "puppeteer-extra": "*" + }, + "peerDependenciesMeta": { + "playwright-extra": { + "optional": true + }, + "puppeteer-extra": { + "optional": true + } + } + }, + "node_modules/puppeteer-extra-plugin-user-preferences": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-preferences/-/puppeteer-extra-plugin-user-preferences-2.4.1.tgz", + "integrity": "sha512-i1oAZxRbc1bk8MZufKCruCEC3CCafO9RKMkkodZltI4OqibLFXF3tj6HZ4LZ9C5vCXZjYcDWazgtY69mnmrQ9A==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "debug": "^4.1.1", + "deepmerge": "^4.2.2", + "puppeteer-extra-plugin": "^3.2.3", + "puppeteer-extra-plugin-user-data-dir": "^2.4.1" + }, + "engines": { + "node": ">=8" + }, + "peerDependencies": { + "playwright-extra": "*", + "puppeteer-extra": "*" + }, + "peerDependenciesMeta": { + "playwright-extra": { + "optional": true + }, + "puppeteer-extra": { + "optional": true + } + } + }, "node_modules/pure-rand": { "version": "7.0.1", "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-7.0.1.tgz", @@ -5001,6 +5350,27 @@ "node": ">=8" } }, + "node_modules/rimraf": { + "version": "6.1.3", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-6.1.3.tgz", + "integrity": "sha512-LKg+Cr2ZF61fkcaK1UdkH2yEBBKnYjTyWzTJT6KNPcSPaiT7HSdhtMXQuN5wkTX0Xu72KQ1l8S42rlmexS2hSA==", + "license": "BlueOak-1.0.0", + "optional": true, + "peer": true, + "dependencies": { + "glob": "^13.0.3", + "package-json-from-dist": "^1.0.1" + }, + "bin": { + "rimraf": "dist/esm/bin.mjs" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/router": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz", @@ -5105,6 +5475,48 @@ "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", "license": "ISC" }, + "node_modules/shallow-clone": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-0.1.2.tgz", + "integrity": "sha512-J1zdXCky5GmNnuauESROVu31MQSnLoYvlyEn6j2Ztk6Q5EHFIhxkMhYcv6vuDzl2XEzoRr856QwzMgWM/TmZgw==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "is-extendable": "^0.1.1", + "kind-of": "^2.0.1", + "lazy-cache": "^0.2.3", + "mixin-object": "^2.0.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/shallow-clone/node_modules/kind-of": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz", + "integrity": "sha512-0u8i1NZ/mg0b+W3MGGw5I7+6Eib2nx72S/QvXa0hYjEkjTknYmEYQJwGu3mLC0BrhtJjtQafTkyRUQ75Kx0LVg==", + "license": "MIT", + "optional": true, + "peer": true, + "dependencies": { + "is-buffer": "^1.0.2" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/shallow-clone/node_modules/lazy-cache": { + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-0.2.7.tgz", + "integrity": "sha512-gkX52wvU/R8DVMMt78ATVPFMJqfW8FPz1GZ1sVHBVQHmu/WvhIWE4cE1GBzhJNFicDeYhnwp6Rl35BcAIM3YOQ==", + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -5724,6 +6136,17 @@ "node": ">=4" } }, + "node_modules/universalify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">= 10.0.0" + } + }, "node_modules/unpipe": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",