Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,23 @@ skilld author assemble

`skilld author assemble` auto-discovers skills with pending output files. `skilld update` re-exports prompts for outdated packages.

### Local Models (Ollama)

If [Ollama](https://ollama.com) is running, skilld auto-detects your locally-pulled text models and lists them in `skilld config` (and `-m ollama:<name>`):

```bash
skilld add npm:vue -m ollama:qwen2.5:14b-instruct
```

Generation runs locally: free, offline, no API key. Unlike the CLI and API backends, the Ollama path is one-shot (it does not explore reference files with tools), so all source material is inlined into a single prompt. Best paired with a capable instruct model.

| Env var | Default | Purpose |
|---------|---------|---------|
| `OLLAMA_HOST` | `http://localhost:11434` | Ollama daemon address |
| `OLLAMA_NUM_CTX` | `32768` | Context window; sized to fit the full prompt + output |

The large default context can exceed memory for big models on constrained hardware (Ollama returns a 500). Lower `OLLAMA_NUM_CTX` or pick a smaller model if generation fails to load.

### Eject

Export a skill as a portable, self-contained directory for sharing via git repos:
Expand Down
3 changes: 3 additions & 0 deletions src/agent/clis/executors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import type { SectionExecutor } from './runner.ts'
import type { OptimizeModel } from './types.ts'
import { getSkillReferenceDirs } from '../../cache/index.ts'
import { CLI_ADAPTERS, CLI_MODELS } from './index.ts'
import { isOllamaModel, ollamaExecutor } from './ollama.ts'
import { getAvailablePiAiModels, isPiAiModel, optimizeSectionPiAi } from './pi-ai.ts'
import { spawnCliAndStream } from './runner.ts'

Expand Down Expand Up @@ -64,5 +65,7 @@ function piAiExecutor(model: OptimizeModel): SectionExecutor | { error: string }

/** Resolve `model` to an executor, or an error if the model is unavailable/unmapped. */
export function selectExecutor(model: OptimizeModel): SectionExecutor | { error: string } {
if (isOllamaModel(model))
return ollamaExecutor(model)
return isPiAiModel(model) ? piAiExecutor(model) : cliExecutor(model)
}
24 changes: 23 additions & 1 deletion src/agent/clis/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { agents } from '../targets/index.ts'
import { adapter as claudeAdapter } from './claude.ts'
import { adapter as codexAdapter } from './codex.ts'
import { adapter as geminiAdapter } from './gemini.ts'
import { getAvailableOllamaModels, isOllamaModel, parseOllamaModelId } from './ollama.ts'
import { getAvailablePiAiModels, isPiAiModel, parsePiAiModelId } from './pi-ai.ts'

export { buildAllSectionPrompts, buildSectionPrompt, SECTION_MERGE_ORDER, SECTION_OUTPUT_FILES } from '../prompts/index.ts'
Expand Down Expand Up @@ -62,6 +63,8 @@ export const CLI_MODELS: Partial<Record<OptimizeModel, CliModelConfig>> = Object
// ── Model helpers ────────────────────────────────────────────────────

export function getModelName(id: OptimizeModel): string {
if (isOllamaModel(id))
return parseOllamaModelId(id) ?? id
if (isPiAiModel(id)) {
const parsed = parsePiAiModelId(id)
return parsed?.modelId ?? id
Expand All @@ -70,6 +73,8 @@ export function getModelName(id: OptimizeModel): string {
}

export function getModelLabel(id: OptimizeModel): string {
if (isOllamaModel(id))
return `Ollama Β· ${parseOllamaModelId(id) ?? id}`
if (isPiAiModel(id)) {
const parsed = parsePiAiModelId(id)
return parsed ? `${PI_PROVIDER_NAMES[parsed.provider] ?? parsed.provider} Β· ${parsed.modelId}` : id
Expand All @@ -85,6 +90,10 @@ export async function getAvailableModels(): Promise<import('./types.ts').ModelIn
const execAsync = promisify(exec)
const lookupCmd = isWindows ? 'where' : 'which'

// Kick off Ollama discovery concurrently with the CLI lookups below; it has
// its own short timeout and resolves to [] when the daemon is unreachable.
const ollamaModelsPromise = getAvailableOllamaModels()

const installedAgents = detectInstalledAgents()
const agentsWithCli = installedAgents.filter(id => agents[id].cli)

Expand Down Expand Up @@ -137,5 +146,18 @@ export async function getAvailableModels(): Promise<import('./types.ts').ModelIn
}
})

return [...cliModels, ...piAiEntries]
// Append locally-pulled Ollama models (one-shot, no auth, free).
const ollamaModels = await ollamaModelsPromise
const ollamaEntries = ollamaModels.map(m => ({
id: m.id,
name: m.name,
hint: m.hint,
agentId: 'ollama',
agentName: 'Ollama (local)',
provider: 'ollama',
providerName: 'Ollama (local)',
vendorGroup: 'Ollama',
}))

return [...cliModels, ...piAiEntries, ...ollamaEntries]
}
200 changes: 200 additions & 0 deletions src/agent/clis/ollama.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
/**
* Ollama executor β€” one-shot local completions via Ollama's `/api/chat`.
*
* Unlike the CLI adapters (claude/codex/gemini) this is NOT an agentic loop:
* no tools, no file exploration, no multi-turn transcript. The caller supplies
* the full prompt and we return the single completion. That makes it free
* (local) and cheap on tokens β€” a good fit for guide synthesis where all the
* source material is already inlined into the prompt.
*
* Ollama has no first-class provider in pi-ai, and pi's OpenAI-compatible path
* can't set Ollama's `num_ctx` option β€” the knob that prevents silent prompt
* truncation below β€” so we talk to `/api/chat` directly.
*
* Model ids are `ollama:<name>`, e.g. `ollama:qwen2.5:14b-instruct`. The host
* defaults to http://localhost:11434, override with `OLLAMA_HOST`.
*/

import type { SectionExecutor } from './runner.ts'
import type { OptimizeModel } from './types.ts'

const OLLAMA_PREFIX = 'ollama:'

export function isOllamaModel(model: string): boolean {
return model.startsWith(OLLAMA_PREFIX)
}

/** Parse `ollama:qwen2.5:14b-instruct` β†’ `qwen2.5:14b-instruct`. */
export function parseOllamaModelId(model: string): string | null {
return isOllamaModel(model) ? model.slice(OLLAMA_PREFIX.length) : null
}

const HAS_SCHEME_RE = /^https?:\/\//
const TRAILING_SLASH_RE = /\/$/

function ollamaHost(): string {
const raw = process.env.OLLAMA_HOST || 'http://localhost:11434'
const withScheme = HAS_SCHEME_RE.test(raw) ? raw : `http://${raw}`
return withScheme.replace(TRAILING_SLASH_RE, '')
}

interface OllamaChatChunk {
message?: { content?: string }
done?: boolean
prompt_eval_count?: number
eval_count?: number
error?: string
}

export interface OllamaModelInfo {
id: OptimizeModel
name: string
hint: string
}

interface OllamaTagsResponse {
models?: Array<{
name: string
size?: number
details?: { parameter_size?: string, quantization_level?: string }
}>
}

interface OllamaShowResponse {
capabilities?: string[]
}

/**
* Whether a model can generate text. `/api/tags` can't tell chat models from
* embedding-only ones (both report e.g. family `gemma3`), and an embedding
* model 500s on `/api/chat` β€” so probe `/api/show` for capabilities. Fail open:
* a probe error or an older Ollama that omits `capabilities` keeps the model.
*/
async function isCompletionModel(name: string): Promise<boolean> {
const res = await fetch(`${ollamaHost()}/api/show`, {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ model: name }),
signal: AbortSignal.timeout(1500),
}).catch(() => null)
if (!res?.ok)
return true

const data = await res.json().catch(() => null) as OllamaShowResponse | null
const caps = data?.capabilities
return !caps?.length || caps.includes('completion') || caps.includes('insert')
}

/**
* Enumerate locally-pulled, text-capable Ollama models via `/api/tags`. Returns
* `[]` when the daemon is unreachable (not installed / not running) β€” discovery
* must never block or throw, it just contributes nothing to the model list.
*/
export async function getAvailableOllamaModels(): Promise<OllamaModelInfo[]> {
const res = await fetch(`${ollamaHost()}/api/tags`, { signal: AbortSignal.timeout(1500) })
.catch(() => null)
if (!res?.ok)
return []

const data = await res.json().catch(() => null) as OllamaTagsResponse | null
if (!data?.models?.length)
return []

const checked = await Promise.all(data.models.map(async (m): Promise<OllamaModelInfo | null> => {
if (!(await isCompletionModel(m.name)))
return null
const params = m.details?.parameter_size
const quant = m.details?.quantization_level
const sizeGb = m.size ? `${(m.size / 1e9).toFixed(1)}GB` : undefined
const detail = params ? `${params}${quant ? ` ${quant}` : ''}` : sizeGb
return {
id: `ollama:${m.name}`,
name: m.name,
hint: detail ? `local Β· ${detail}` : 'local',
}
}))

return checked.filter((m): m is OllamaModelInfo => m !== null)
}

export function ollamaExecutor(model: OptimizeModel): SectionExecutor | { error: string } {
const modelId = parseOllamaModelId(model)
if (!modelId)
return { error: `Not an Ollama model: ${model}` }

return {
cliCleanup: false,
run: async ({ section, prompt, timeout, onProgress }) => {
const ac = new AbortController()
const timer = setTimeout(() => ac.abort(), timeout)
onProgress?.({ chunk: '[ollama]', type: 'reasoning', text: '', reasoning: 'Generating locally…', section })

const res = await fetch(`${ollamaHost()}/api/chat`, {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({
model: modelId,
messages: [{ role: 'user', content: prompt }],
stream: true,
// Ollama defaults num_ctx to ~4k, which silently truncates our
// ~16k-token prompt (release notes) and starves synthesis. Size it to
// hold the full material + output; override with OLLAMA_NUM_CTX.
options: { temperature: 0.2, num_ctx: Number(process.env.OLLAMA_NUM_CTX) || 32768 },
}),
signal: ac.signal,
}).catch((err: Error) => ({ ok: false, statusText: err.message, body: null } as unknown as Response))

if (!res.ok || !res.body) {
clearTimeout(timer)
return {
text: '',
stderr: `Ollama request failed: ${res.statusText}. Is \`ollama serve\` running at ${ollamaHost()}?`,
exitCode: 1,
}
}

let text = ''
let usage: { input: number, output: number } | undefined

// Ollama streams NDJSON: one JSON object per line, the final one carrying
// `done: true` plus token counts. Accumulate content and surface deltas.
const drain = async (): Promise<void> => {
const reader = res.body!.getReader()
const decoder = new TextDecoder()
let buffer = ''
for (;;) {
const { done, value } = await reader.read()
if (done)
break
buffer += decoder.decode(value, { stream: true })
const lines = buffer.split('\n')
buffer = lines.pop() || ''
for (const line of lines) {
if (!line.trim())
continue
const chunk = JSON.parse(line) as OllamaChatChunk
if (chunk.error)
throw new Error(chunk.error)
const delta = chunk.message?.content
if (delta) {
text += delta
onProgress?.({ chunk: delta, type: 'text', text, reasoning: '', section })
}
if (chunk.done)
usage = { input: chunk.prompt_eval_count ?? 0, output: chunk.eval_count ?? 0 }
}
}
}

const streamError = await drain().then(() => undefined).catch((err: Error) => err.message)
clearTimeout(timer)

if (streamError)
return { text: '', stderr: `Ollama stream error: ${streamError}`, exitCode: 1 }
if (!text.trim())
return { text: '', stderr: 'Ollama returned no content', exitCode: 1 }

return { text: text.trim(), usage, cost: 0 }
},
}
}
2 changes: 2 additions & 0 deletions src/agent/clis/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ export type OptimizeModel
| 'gpt-5.2-codex'
// pi-ai direct API models β€” dynamic from pi-ai's model registry
| `pi:${string}`
// Local Ollama models, one-shot completions β€” e.g. `ollama:qwen2.5:14b-instruct`
| `ollama:${string}`

export interface ModelInfo {
id: OptimizeModel
Expand Down
Loading