From 53c23ff8a172377627375a2e2b48ee6b042ba060 Mon Sep 17 00:00:00 2001 From: prode Date: Mon, 8 Jun 2026 18:49:01 -0300 Subject: [PATCH 1/2] feat(update): preserve model and effort fields during updates for managed agents and skills --- internal/cli/update.go | 176 +++++++++++++++++++++++++++++------- internal/cli/update_test.go | 99 ++++++++++++++++++++ 2 files changed, 244 insertions(+), 31 deletions(-) diff --git a/internal/cli/update.go b/internal/cli/update.go index dd5937b..73e62df 100644 --- a/internal/cli/update.go +++ b/internal/cli/update.go @@ -8,8 +8,10 @@ import ( "os" "path/filepath" "sort" + "strings" "time" + "github.com/protonspy/csdd/internal/frontmatter" "github.com/protonspy/csdd/internal/manifest" "github.com/protonspy/csdd/internal/paths" "github.com/protonspy/csdd/internal/render" @@ -22,23 +24,27 @@ import ( // .mcp.json, settings.json, CLAUDE.md, and any custom (non-shipped) skill or // agent — are deliberately NOT collected here, so update can never clobber them. type managedFile struct { - Rel string // workspace-relative, forward-slash: manifest key + display name - Abs string // absolute on-disk path - Content string // the content this csdd version ships for the file - Exec bool // chmod 0755 after writing (hook scripts) + Rel string // workspace-relative, forward-slash: manifest key + display name + Abs string // absolute on-disk path + Content string // the content this csdd version ships for the file + Exec bool // chmod 0755 after writing (hook scripts) + PreserveFrontmatter []string // local scalar overrides update carries forward } +var managedExecutionOverrideKeys = []string{"model", "effort"} + // collectManagedFiles enumerates every pure-csdd artifact `csdd init` scaffolds // from the embedded template tree: generation rules, versioned templates, the // shipped skills/agents/commands/hooks, the canonical guide, and csdd.md. func collectManagedFiles(root string, templates embed.FS) ([]managedFile, error) { var out []managedFile - add := func(abs, content string, exec bool) { + add := func(abs, content string, exec bool, preserveFrontmatter []string) { out = append(out, managedFile{ - Rel: filepath.ToSlash(workspace.Relative(root, abs)), - Abs: abs, - Content: content, - Exec: exec, + Rel: filepath.ToSlash(workspace.Relative(root, abs)), + Abs: abs, + Content: content, + Exec: exec, + PreserveFrontmatter: preserveFrontmatter, }) } @@ -47,7 +53,7 @@ func collectManagedFiles(root string, templates embed.FS) ([]managedFile, error) return nil, err } for name, c := range rules { - add(filepath.Join(paths.Rules(root), name), c, false) + add(filepath.Join(paths.Rules(root), name), c, false, nil) } versioned, err := templater.WorkflowTemplateFiles(templates) @@ -55,18 +61,19 @@ func collectManagedFiles(root string, templates embed.FS) ([]managedFile, error) return nil, err } for rel, c := range versioned { - add(filepath.Join(paths.Templates(root), filepath.FromSlash(rel)), c, false) + add(filepath.Join(paths.Templates(root), filepath.FromSlash(rel)), c, false, nil) } trees := []struct { - base string - fn func(fs.FS) (map[string]string, error) - exec bool + base string + fn func(fs.FS) (map[string]string, error) + exec bool + preserveFrontmatter []string }{ - {paths.Skills(root), templater.SkillFiles, false}, - {paths.Agents(root), templater.AgentFiles, false}, - {paths.Commands(root), templater.CommandFiles, false}, - {paths.Hooks(root), templater.HookFiles, true}, + {paths.Skills(root), templater.SkillFiles, false, managedExecutionOverrideKeys}, + {paths.Agents(root), templater.AgentFiles, false, managedExecutionOverrideKeys}, + {paths.Commands(root), templater.CommandFiles, false, nil}, + {paths.Hooks(root), templater.HookFiles, true, nil}, } for _, t := range trees { entries, err := t.fn(templates) @@ -74,7 +81,7 @@ func collectManagedFiles(root string, templates embed.FS) ([]managedFile, error) return nil, err } for rel, c := range entries { - add(filepath.Join(t.base, filepath.FromSlash(rel)), c, t.exec) + add(filepath.Join(t.base, filepath.FromSlash(rel)), c, t.exec, t.preserveFrontmatter) } } @@ -82,13 +89,13 @@ func collectManagedFiles(root string, templates embed.FS) ([]managedFile, error) if err != nil { return nil, err } - add(filepath.Join(root, "docs", "guides", "claude-code-sdd.md"), guide, false) + add(filepath.Join(root, "docs", "guides", "claude-code-sdd.md"), guide, false, nil) csddmd, err := templater.Static(templates, "templates/root/csdd.md.tmpl") if err != nil { return nil, err } - add(filepath.Join(root, "csdd.md"), csddmd, false) + add(filepath.Join(root, "csdd.md"), csddmd, false, nil) // Deterministic order so dry-run previews and reports are stable. sort.Slice(out, func(i, j int) bool { return out[i].Rel < out[j].Rel }) @@ -115,7 +122,7 @@ func recordManifest(root string, templates embed.FS, now time.Time, skipped map[ continue } } - m.Files[f.Rel] = manifest.Hash(f.Content) + m.Files[f.Rel] = managedBaselineHash(f, f.Content) } return m.Save(paths.Manifest(root), version, now) } @@ -247,12 +254,12 @@ func updateWorkspace(opts updateOptions, templates embed.FS, now time.Time) (upd res.firstRun = !existed for _, f := range files { - shipped := manifest.Hash(f.Content) + shipped := managedBaselineHash(f, f.Content) diskBytes, rerr := os.ReadFile(f.Abs) if os.IsNotExist(rerr) { if !opts.dryRun { - if err := writeManaged(f); err != nil { + if err := writeManaged(f, f.Content); err != nil { return res, err } } @@ -264,16 +271,18 @@ func updateWorkspace(opts updateOptions, templates embed.FS, now time.Time) (upd } diskHash := manifest.Hash(string(diskBytes)) - if diskHash == shipped { + diskBaselineHash := managedBaselineHash(f, string(diskBytes)) + writeContent := managedWriteContent(f, string(diskBytes)) + if diskHash == manifest.Hash(writeContent) || diskBaselineHash == shipped { res.changes = append(res.changes, fileChange{rel: f.Rel, kind: kindCurrent}) continue } - if known, ok := base.Files[f.Rel]; ok && diskHash == known { + if known, ok := base.Files[f.Rel]; ok && diskBaselineHash == known { // Disk matches the last baseline csdd wrote: the user never edited it, // so refreshing in place loses nothing. if !opts.dryRun { - if err := writeManaged(f); err != nil { + if err := writeManaged(f, writeContent); err != nil { return res, err } } @@ -301,7 +310,7 @@ func updateWorkspace(opts updateOptions, templates embed.FS, now time.Time) (upd } } if !opts.dryRun { - if err := writeManaged(f); err != nil { + if err := writeManaged(f, writeContent); err != nil { return res, err } } @@ -322,13 +331,40 @@ func updateWorkspace(opts updateOptions, templates embed.FS, now time.Time) (upd return res, nil } -// writeManaged writes a managed file's shipped content, creating parent dirs and +// managedBaselineHash returns the hash used for manifest and pristine checks. +// For managed agents and skills, model/effort are local execution overrides: +// update must preserve them, but they should not make an otherwise-pristine +// shipped artifact look user-edited. +func managedBaselineHash(f managedFile, content string) string { + return manifest.Hash(stripFrontmatterFields(content, f.PreserveFrontmatter)) +} + +// managedWriteContent overlays preserved frontmatter scalar values from the +// existing file onto this version's shipped content. +func managedWriteContent(f managedFile, existing string) string { + if len(f.PreserveFrontmatter) == 0 { + return f.Content + } + fm := frontmatter.Parse(existing) + values := map[string]string{} + for _, key := range f.PreserveFrontmatter { + if value := fm.AsString(key, ""); value != "" { + values[key] = value + } + } + if len(values) == 0 { + return f.Content + } + return upsertFrontmatterFields(f.Content, f.PreserveFrontmatter, values) +} + +// writeManaged writes a managed file's effective content, creating parent dirs and // restoring the executable bit for hook scripts. -func writeManaged(f managedFile) error { +func writeManaged(f managedFile, content string) error { if err := os.MkdirAll(filepath.Dir(f.Abs), 0o755); err != nil { return err } - if err := os.WriteFile(f.Abs, []byte(f.Content), 0o644); err != nil { + if err := os.WriteFile(f.Abs, []byte(content), 0o644); err != nil { return err } if f.Exec { @@ -337,6 +373,84 @@ func writeManaged(f managedFile) error { return nil } +func stripFrontmatterFields(content string, keys []string) string { + if len(keys) == 0 { + return content + } + lines := strings.Split(content, "\n") + end := frontmatterEnd(lines) + if end < 0 { + return content + } + keySet := stringSet(keys) + out := make([]string, 0, len(lines)) + out = append(out, lines[0]) + for _, line := range lines[1:end] { + if key, ok := frontmatterLineKey(line); ok && keySet[key] { + continue + } + out = append(out, line) + } + out = append(out, lines[end:]...) + return strings.Join(out, "\n") +} + +func upsertFrontmatterFields(content string, order []string, values map[string]string) string { + lines := strings.Split(content, "\n") + end := frontmatterEnd(lines) + if end < 0 { + return content + } + keySet := stringSet(order) + out := make([]string, 0, len(lines)+len(values)) + out = append(out, lines[0]) + for _, line := range lines[1:end] { + if key, ok := frontmatterLineKey(line); ok && keySet[key] { + continue + } + out = append(out, line) + } + for _, key := range order { + if value, ok := values[key]; ok { + out = append(out, key+": "+value) + } + } + out = append(out, lines[end:]...) + return strings.Join(out, "\n") +} + +func frontmatterEnd(lines []string) int { + if len(lines) == 0 || strings.TrimSpace(lines[0]) != "---" { + return -1 + } + for i := 1; i < len(lines); i++ { + if strings.TrimSpace(lines[i]) == "---" { + return i + } + } + return -1 +} + +func frontmatterLineKey(line string) (string, bool) { + trimmed := strings.TrimSpace(line) + if trimmed == "" || strings.HasPrefix(trimmed, "#") { + return "", false + } + idx := strings.Index(line, ":") + if idx < 0 { + return "", false + } + return strings.TrimSpace(line[:idx]), true +} + +func stringSet(values []string) map[string]bool { + out := make(map[string]bool, len(values)) + for _, value := range values { + out[value] = true + } + return out +} + // nextOldPath returns the first free -N.old (N counting up from 1), so a // file conflicting across several updates accrues -1.old, -2.old, … rather than // overwriting an earlier backup. It only stats paths; it never writes. diff --git a/internal/cli/update_test.go b/internal/cli/update_test.go index a482b6e..88dbc3f 100644 --- a/internal/cli/update_test.go +++ b/internal/cli/update_test.go @@ -113,6 +113,105 @@ func TestUpdatePristineOutdatedUpdatesInPlace(t *testing.T) { } } +func TestUpdatePreservesManagedAgentModelEffort(t *testing.T) { + dir := freshWorkspace(t) + agent := filepath.Join(dir, ".claude", "agents", "implementer.md") + withOverrides := strings.Replace( + readFile(t, agent), + "tools: Read, Grep, Glob, Edit, Write, Bash\n---", + "tools: Read, Grep, Glob, Edit, Write, Bash\nmodel: opus\neffort: high\n---", + 1, + ) + if err := os.WriteFile(agent, []byte(withOverrides), 0o644); err != nil { + t.Fatal(err) + } + + code, out, _ := run(t, "update", "--root", dir) + if code != 0 { + t.Fatalf("update failed: code=%d\n%s", code, out) + } + got := readFile(t, agent) + for _, want := range []string{"model: opus", "effort: high"} { + if !strings.Contains(got, want) { + t.Errorf("update lost managed agent override %q:\n%s", want, got) + } + } + if olds := oldBackups(t, dir); len(olds) != 0 { + t.Errorf("model/effort-only changes should not create .old backups: %v", olds) + } + if !strings.Contains(out, "0 conflict(s)") { + t.Errorf("model/effort-only changes should not be conflicts:\n%s", out) + } +} + +func TestUpdatePreservesManagedSkillModelEffort(t *testing.T) { + dir := freshWorkspace(t) + skill := filepath.Join(dir, ".claude", "skills", "verify-change", "SKILL.md") + withOverrides := strings.Replace( + readFile(t, skill), + "description: Run the project's executable checks (tests, lint, typecheck, build) and produce evidence. Use before reporting a task complete or before opening a PR.\n---", + "description: Run the project's executable checks (tests, lint, typecheck, build) and produce evidence. Use before reporting a task complete or before opening a PR.\nmodel: sonnet\neffort: high\n---", + 1, + ) + if err := os.WriteFile(skill, []byte(withOverrides), 0o644); err != nil { + t.Fatal(err) + } + + code, out, _ := run(t, "update", "--root", dir) + if code != 0 { + t.Fatalf("update failed: code=%d\n%s", code, out) + } + got := readFile(t, skill) + for _, want := range []string{"model: sonnet", "effort: high"} { + if !strings.Contains(got, want) { + t.Errorf("update lost managed skill override %q:\n%s", want, got) + } + } + if olds := oldBackups(t, dir); len(olds) != 0 { + t.Errorf("model/effort-only changes should not create .old backups: %v", olds) + } +} + +func TestUpdateCarriesManagedAgentModelEffortAcrossTemplateRefresh(t *testing.T) { + dir := freshWorkspace(t) + agent := filepath.Join(dir, ".claude", "agents", "implementer.md") + relKey := ".claude/agents/implementer.md" + + oldShipped := "---\nname: implementer\ndescription: old\ntools: Read\n---\nOLD BODY\n" + oldWithOverrides := "---\nname: implementer\ndescription: old\ntools: Read\nmodel: sonnet\neffort: max\n---\nOLD BODY\n" + if err := os.WriteFile(agent, []byte(oldWithOverrides), 0o644); err != nil { + t.Fatal(err) + } + m, _, err := manifest.Load(paths.Manifest(dir)) + if err != nil { + t.Fatal(err) + } + m.Files[relKey] = manifest.Hash(oldShipped) + if err := m.Save(paths.Manifest(dir), "test", time.Now()); err != nil { + t.Fatal(err) + } + + code, out, _ := run(t, "update", "--root", dir) + if code != 0 { + t.Fatalf("update failed: code=%d\n%s", code, out) + } + got := readFile(t, agent) + for _, want := range []string{"model: sonnet", "effort: max", "You implement **one task at a time**"} { + if !strings.Contains(got, want) { + t.Errorf("refreshed agent missing %q:\n%s", want, got) + } + } + if strings.Contains(got, "OLD BODY") { + t.Errorf("pristine outdated agent should be refreshed, got:\n%s", got) + } + if _, err := os.Stat(agent + "-1.old"); !os.IsNotExist(err) { + t.Errorf("metadata-only overrides should update in place without .old backup (err=%v)", err) + } + if !strings.Contains(out, "1 updated") { + t.Errorf("template refresh should be reported as update:\n%s", out) + } +} + func TestUpdateForceSkipsBackup(t *testing.T) { dir := freshWorkspace(t) rule := filepath.Join(dir, ".claude", "rules", "ears-format.md") From cd588d7458c9fdaae85d58585d3696e5564e2357 Mon Sep 17 00:00:00 2001 From: prode Date: Fri, 12 Jun 2026 19:18:29 -0300 Subject: [PATCH 2/2] feat(spec): add per-spec development_flow (unit|tdd|tdd-e2e) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let a spec choose its development discipline at `spec init` so simple features skip disproportionate test-first ceremony while critical ones keep full rigor. - spec.json gains `development_flow` (omitempty): unit (tests after code), tdd (test-first RED→GREEN, default), tdd-e2e (TDD + end-to-end). - `spec init --flow` across CLI, the MCP `csdd_spec_init` tool, and the TUI wizard, all via one exported `SpecInit` op (single source of truth). - Workspace default resolved from steering frontmatter `default_development_flow` (absent ⇒ tdd); the steering validator flags an invalid value (exit 2). - Backward compatible: a legacy spec.json with no field reads as tdd, and omitempty keeps untouched files byte-identical. - Flow-aware guidance prose in the shipped baseline (tasks-generation, definition-of-done, tdd-cycle, implementer, tasks.md), plus a one-test-per-observable-behavior rule to curb test bloat. Verification: gofmt clean · go vet ./... · go test -race ./... green (455 pass) · tsc --noEmit (mcp-server) · spec validate passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/cli/spec.go | 156 +++++++++++++---- internal/cli/spec_flow_test.go | 163 ++++++++++++++++++ internal/session/session.go | 1 + internal/session/session_test.go | 17 ++ .../templates/agents/implementer.md.tmpl | 28 ++- .../rules/definition-of-done.md.tmpl | 12 +- .../templates/rules/tasks-generation.md.tmpl | 42 +++-- .../templates/skills/tdd-cycle/SKILL.md.tmpl | 10 +- .../templater/templates/spec/spec.json.tmpl | 1 + .../templater/templates/spec/tasks.md.tmpl | 12 +- internal/tui/wizard.go | 16 +- internal/tui/wizard_test.go | 25 ++- internal/validator/validator.go | 13 ++ internal/validator/validator_test.go | 26 +++ mcp-server/src/tools/spec.ts | 15 +- 15 files changed, 465 insertions(+), 72 deletions(-) create mode 100644 internal/cli/spec_flow_test.go diff --git a/internal/cli/spec.go b/internal/cli/spec.go index dcc1d04..ad8abc4 100644 --- a/internal/cli/spec.go +++ b/internal/cli/spec.go @@ -15,6 +15,7 @@ import ( "strings" "time" + "github.com/protonspy/csdd/internal/frontmatter" "github.com/protonspy/csdd/internal/paths" "github.com/protonspy/csdd/internal/render" "github.com/protonspy/csdd/internal/session" @@ -28,11 +29,72 @@ type SpecJSON struct { FeatureName string `json:"feature_name"` Language string `json:"language"` Phase string `json:"phase"` + DevelopmentFlow string `json:"development_flow,omitempty"` Approvals map[string]ApprovalFlag `json:"approvals"` ReadyForImplementation bool `json:"ready_for_implementation"` CreatedAt string `json:"created_at"` } +// defaultDevelopmentFlow is the flow assumed when none is selected and steering +// declares no default. It keeps csdd's test-first posture as the default. +const defaultDevelopmentFlow = "tdd" + +// developmentFlows is the closed set of selectable development flows: +// - unit: tests written after the code (no RED-first ritual) +// - tdd: test-first RED→GREEN (the default) +// - tdd-e2e: TDD plus end-to-end coverage of golden and error flows +var developmentFlows = []string{"unit", "tdd", "tdd-e2e"} + +// validDevelopmentFlow reports whether f is one of the selectable flows. +func validDevelopmentFlow(f string) bool { + for _, v := range developmentFlows { + if v == f { + return true + } + } + return false +} + +// effectiveFlow coerces a stored (possibly empty, e.g. legacy spec.json) flow to +// the value callers should act on. Absent ⇒ defaultDevelopmentFlow. +func effectiveFlow(f string) string { + if f == "" { + return defaultDevelopmentFlow + } + return f +} + +// resolveDefaultFlow returns the workspace default development flow declared in +// steering frontmatter (key default_development_flow). Steering files are scanned +// in name order; the first valid declared value wins. Absent or invalid ⇒ +// defaultDevelopmentFlow (an invalid value is separately flagged by the steering +// validator, so init stays robust rather than writing a bad flow). +func resolveDefaultFlow(root string) string { + dir := paths.Steering(root) + entries, err := os.ReadDir(dir) + if err != nil { + return defaultDevelopmentFlow + } + var names []string + for _, e := range entries { + if !e.IsDir() && strings.HasSuffix(e.Name(), ".md") { + names = append(names, e.Name()) + } + } + sort.Strings(names) + for _, n := range names { + data, err := os.ReadFile(filepath.Join(dir, n)) + if err != nil { + continue + } + fm := frontmatter.Parse(string(data)) + if v := fm.AsString("default_development_flow", ""); validDevelopmentFlow(v) { + return v + } + } + return defaultDevelopmentFlow +} + // ApprovalFlag tracks generation + approval state for a phase. type ApprovalFlag struct { Generated bool `json:"generated"` @@ -72,57 +134,84 @@ func runSpec(args []string, templates embed.FS) int { } } -func specInit(args []string, templates embed.FS) int { - fs := flag.NewFlagSet("spec init", flag.ContinueOnError) - var root, language string - addRoot(fs, &root) - fs.StringVar(&language, "language", "en", "Spec language (default: en).") - positionals, err := parseFlags(fs, args) - if err != nil { - return failOnFlagParse(err) - } - if len(positionals) < 1 { - render.Err("usage: " + prog() + " spec init FEATURE") - return 1 +// SpecInitOptions is the headless input shared by the CLI action and the TUI +// wizard. Flow "" resolves the steering default, then defaultDevelopmentFlow. +type SpecInitOptions struct { + Root string + Feature string + Language string + Flow string +} + +// SpecInit creates specs//spec.json with the resolved development flow. +// It is the single source of truth for spec creation; the CLI action and the TUI +// wizard both fill SpecInitOptions and call it. An explicit invalid Flow is +// rejected before any write (the caller maps the error to exit 1). +func SpecInit(templates embed.FS, opts SpecInitOptions) error { + if opts.Flow != "" && !validDevelopmentFlow(opts.Flow) { + return fmt.Errorf("invalid development flow %q: must be one of %s", opts.Flow, strings.Join(developmentFlows, "|")) } - r, err := workspace.Resolve(root) + r, err := workspace.Resolve(opts.Root) if err != nil { - render.Err(err.Error()) - return 1 + return err } if _, err := workspace.SpecsDir(r); err != nil { - render.Err(err.Error()) - return 1 + return err } - feature := positionals[0] - if err := workspace.KebabCheck(feature, "feature"); err != nil { - render.Err(err.Error()) - return 1 + if err := workspace.KebabCheck(opts.Feature, "feature"); err != nil { + return err } - target := filepath.Join(paths.Specs(r), feature) + target := filepath.Join(paths.Specs(r), opts.Feature) if pathExists(target) { - render.Err("spec already exists: " + workspace.Relative(r, target)) - return 1 + return errors.New("spec already exists: " + workspace.Relative(r, target)) + } + flow := opts.Flow + if flow == "" { + flow = resolveDefaultFlow(r) + } + language := opts.Language + if language == "" { + language = "en" } if err := mkdirAll(target); err != nil { - render.Err(err.Error()) - return 1 + return err } content, err := templater.Render(templates, "templates/spec/spec.json.tmpl", map[string]string{ - "Feature": feature, - "Language": language, - "CreatedAt": time.Now().UTC().Format("2006-01-02T15:04:05Z"), + "Feature": opts.Feature, + "Language": language, + "DevelopmentFlow": flow, + "CreatedAt": time.Now().UTC().Format("2006-01-02T15:04:05Z"), }) if err != nil { - render.Err(err.Error()) - return 1 + return err } if err := workspace.WriteFile(filepath.Join(target, "spec.json"), content, false); err != nil { + return err + } + render.OK("created " + workspace.Relative(r, target) + "/ (flow: " + flow + ")") + render.Info(fmt.Sprintf("next: `%s spec generate --artifact requirements`", prog())) + return nil +} + +func specInit(args []string, templates embed.FS) int { + fs := flag.NewFlagSet("spec init", flag.ContinueOnError) + var opts SpecInitOptions + addRoot(fs, &opts.Root) + fs.StringVar(&opts.Language, "language", "en", "Spec language (default: en).") + fs.StringVar(&opts.Flow, "flow", "", "Development flow: unit|tdd|tdd-e2e (default: steering default, else tdd).") + positionals, err := parseFlags(fs, args) + if err != nil { + return failOnFlagParse(err) + } + if len(positionals) < 1 { + render.Err("usage: " + prog() + " spec init FEATURE") + return 1 + } + opts.Feature = positionals[0] + if err := SpecInit(templates, opts); err != nil { render.Err(err.Error()) return 1 } - render.OK("created " + workspace.Relative(r, target) + "/") - render.Info(fmt.Sprintf("next: `%s spec generate --artifact requirements`", prog())) return 0 } @@ -223,6 +312,7 @@ func specShow(args []string) int { fmt.Println(render.Bold("feature: " + data.FeatureName)) fmt.Println("phase: " + data.Phase) fmt.Println("language: " + data.Language) + fmt.Println("flow: " + effectiveFlow(data.DevelopmentFlow)) fmt.Println("created: " + data.CreatedAt) fmt.Printf("ready: %v\n", data.ReadyForImplementation) fmt.Println("approvals:") diff --git a/internal/cli/spec_flow_test.go b/internal/cli/spec_flow_test.go new file mode 100644 index 0000000..aaf081a --- /dev/null +++ b/internal/cli/spec_flow_test.go @@ -0,0 +1,163 @@ +package cli + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/protonspy/csdd/internal/templater" +) + +// readSpecFlow returns the raw development_flow field written to a spec.json. +func readSpecFlow(t *testing.T, dir, feature string) (SpecJSON, []byte) { + t.Helper() + b, err := os.ReadFile(filepath.Join(dir, "specs", feature, "spec.json")) + if err != nil { + t.Fatalf("read spec.json: %v", err) + } + var s SpecJSON + if err := json.Unmarshal(b, &s); err != nil { + t.Fatalf("unmarshal spec.json: %v", err) + } + return s, b +} + +// Req 1.1 / 8.3: each valid flow is persisted and surfaced by `spec show`. +func TestSpecInitFlowPersistedAndShown(t *testing.T) { + for _, flow := range []string{"unit", "tdd", "tdd-e2e"} { + flow := flow + t.Run(flow, func(t *testing.T) { + dir := freshWorkspace(t) + if code, _, errOut := run(t, "spec", "init", "feat-"+flow, "--flow", flow, "--root", dir); code != 0 { + t.Fatalf("init --flow %s: code=%d err=%q", flow, code, errOut) + } + s, _ := readSpecFlow(t, dir, "feat-"+flow) + if s.DevelopmentFlow != flow { + t.Errorf("development_flow = %q, want %q", s.DevelopmentFlow, flow) + } + _, showOut, _ := run(t, "spec", "show", "feat-"+flow, "--root", dir) + if !strings.Contains(showOut, "flow:") || !strings.Contains(showOut, flow) { + t.Errorf("show output missing flow %q: %s", flow, showOut) + } + }) + } +} + +// Req 1.2: an invalid flow is rejected (exit 1) and writes no spec.json. +func TestSpecInitFlowInvalid(t *testing.T) { + dir := freshWorkspace(t) + code, _, errOut := run(t, "spec", "init", "bad-flow", "--flow", "xunit", "--root", dir) + if code != 1 { + t.Errorf("invalid flow: code=%d, want 1", code) + } + if !strings.Contains(errOut, "flow") { + t.Errorf("error should mention flow: %q", errOut) + } + if _, err := os.Stat(filepath.Join(dir, "specs", "bad-flow")); !os.IsNotExist(err) { + t.Error("spec dir must not be created when --flow is invalid") + } +} + +// Req 1.3 / 2.2: with no steering default, an omitted flow resolves to tdd. +func TestSpecInitFlowDefaultsToTdd(t *testing.T) { + dir := freshWorkspace(t) + if code, _, errOut := run(t, "spec", "init", "no-flow", "--root", dir); code != 0 { + t.Fatalf("init: code=%d err=%q", code, errOut) + } + s, _ := readSpecFlow(t, dir, "no-flow") + if s.DevelopmentFlow != "tdd" { + t.Errorf("development_flow = %q, want tdd", s.DevelopmentFlow) + } +} + +// Req 1.3 / 2.1: a steering-declared default is applied when flow is omitted. +func TestSpecInitFlowSteeringDefault(t *testing.T) { + dir := freshWorkspace(t) + steering := filepath.Join(dir, ".claude", "steering", "zzz-flow-default.md") + body := "---\ninclusion: manual\ndefault_development_flow: tdd-e2e\n---\n# default\n" + if err := os.WriteFile(steering, []byte(body), 0o644); err != nil { + t.Fatal(err) + } + if code, _, errOut := run(t, "spec", "init", "inherits", "--root", dir); code != 0 { + t.Fatalf("init: code=%d err=%q", code, errOut) + } + s, _ := readSpecFlow(t, dir, "inherits") + if s.DevelopmentFlow != "tdd-e2e" { + t.Errorf("development_flow = %q, want tdd-e2e", s.DevelopmentFlow) + } + // An explicit --flow overrides the steering default. + if code, _, _ := run(t, "spec", "init", "explicit", "--flow", "unit", "--root", dir); code != 0 { + t.Fatal("init explicit failed") + } + s2, _ := readSpecFlow(t, dir, "explicit") + if s2.DevelopmentFlow != "unit" { + t.Errorf("explicit flow = %q, want unit", s2.DevelopmentFlow) + } +} + +// Req 2.2: an invalid steering default does not corrupt init; it falls back to tdd. +func TestResolveDefaultFlowInvalidFallsBack(t *testing.T) { + dir := freshWorkspace(t) + steering := filepath.Join(dir, ".claude", "steering", "zzz-bad.md") + if err := os.WriteFile(steering, []byte("---\ninclusion: manual\ndefault_development_flow: bogus\n---\n"), 0o644); err != nil { + t.Fatal(err) + } + if got := resolveDefaultFlow(dir); got != "tdd" { + t.Errorf("resolveDefaultFlow with invalid steering = %q, want tdd", got) + } +} + +// Req 3.1: a legacy spec.json with no development_flow is read as tdd. +func TestEffectiveFlowLegacy(t *testing.T) { + if got := effectiveFlow(""); got != "tdd" { + t.Errorf("effectiveFlow(\"\") = %q, want tdd", got) + } + if got := effectiveFlow("unit"); got != "unit" { + t.Errorf("effectiveFlow(unit) = %q, want unit", got) + } +} + +// Req 3.3: saving a spec with no flow omits the field, so legacy files stay clean. +func TestSaveSpecJSONOmitsEmptyFlow(t *testing.T) { + dir := t.TempDir() + if err := saveSpecJSON(dir, SpecJSON{FeatureName: "legacy", Phase: "initialized"}); err != nil { + t.Fatal(err) + } + b, err := os.ReadFile(filepath.Join(dir, "spec.json")) + if err != nil { + t.Fatal(err) + } + if strings.Contains(string(b), "development_flow") { + t.Errorf("empty flow must be omitted from spec.json, got:\n%s", b) + } +} + +func TestValidDevelopmentFlow(t *testing.T) { + for _, ok := range []string{"unit", "tdd", "tdd-e2e"} { + if !validDevelopmentFlow(ok) { + t.Errorf("validDevelopmentFlow(%q) = false, want true", ok) + } + } + for _, bad := range []string{"", "TDD", "e2e", "unit-test"} { + if validDevelopmentFlow(bad) { + t.Errorf("validDevelopmentFlow(%q) = true, want false", bad) + } + } +} + +// Req 1.1: the exported op is the single source of truth (CLI + TUI call it). +func TestSpecInitOpWritesFlow(t *testing.T) { + dir := freshWorkspace(t) + if err := SpecInit(templater.FS, SpecInitOptions{Root: dir, Feature: "via-op", Flow: "unit"}); err != nil { + t.Fatalf("SpecInit: %v", err) + } + s, _ := readSpecFlow(t, dir, "via-op") + if s.DevelopmentFlow != "unit" { + t.Errorf("op-written flow = %q, want unit", s.DevelopmentFlow) + } + if err := SpecInit(templater.FS, SpecInitOptions{Root: dir, Feature: "bad", Flow: "nope"}); err == nil { + t.Error("SpecInit with invalid flow should error") + } +} diff --git a/internal/session/session.go b/internal/session/session.go index 7e4428b..a28a1ad 100644 --- a/internal/session/session.go +++ b/internal/session/session.go @@ -26,6 +26,7 @@ type specJSON struct { FeatureName string `json:"feature_name"` Language string `json:"language"` Phase string `json:"phase"` + DevelopmentFlow string `json:"development_flow,omitempty"` Approvals map[string]approvalFlag `json:"approvals"` ReadyForImplementation bool `json:"ready_for_implementation"` CreatedAt string `json:"created_at"` diff --git a/internal/session/session_test.go b/internal/session/session_test.go index d8de82f..941a11b 100644 --- a/internal/session/session_test.go +++ b/internal/session/session_test.go @@ -8,6 +8,23 @@ import ( "testing" ) +// specMirrorFixture exercises the session specJSON mirror's development_flow +// field. The cli package asserts the same field independently (spec_flow_test.go); +// the two mirror structs are kept tag-for-tag in sync by review discipline. +const specMirrorFixture = `{"feature_name":"x","language":"en","phase":"initialized","development_flow":"tdd-e2e","approvals":{},"ready_for_implementation":false,"created_at":"2026-01-01T00:00:00Z"}` + +// TestSpecJSONMirrorParsesFlow asserts the read-only mirror parses the +// development_flow field (kept in sync with cli.SpecJSON). +func TestSpecJSONMirrorParsesFlow(t *testing.T) { + var s specJSON + if err := json.Unmarshal([]byte(specMirrorFixture), &s); err != nil { + t.Fatal(err) + } + if s.DevelopmentFlow != "tdd-e2e" { + t.Errorf("mirror DevelopmentFlow = %q, want tdd-e2e", s.DevelopmentFlow) + } +} + // writeWorkspace materializes a temp workspace from a path→content map and // returns the root. Paths are slash-separated and relative to the root. func writeWorkspace(t *testing.T, files map[string]string) string { diff --git a/internal/templater/templates/agents/implementer.md.tmpl b/internal/templater/templates/agents/implementer.md.tmpl index 43f8d4f..8645b6f 100644 --- a/internal/templater/templates/agents/implementer.md.tmpl +++ b/internal/templater/templates/agents/implementer.md.tmpl @@ -1,12 +1,12 @@ --- name: implementer -description: Implements a single task from specs/*/tasks.md test-first (RED→GREEN→REFACTOR) inside its approved design.md boundary — runs the verification gate, records evidence, marks the task done, updates notes, and reports. Language-agnostic; specialize per stack via steering and skills. +description: Implements a single task from specs/*/tasks.md inside its approved design.md boundary, following the spec's development_flow (tdd test-first by default; unit tests-after; tdd-e2e adds end-to-end) — runs the verification gate, records evidence, marks the task done, updates notes, and reports. Language-agnostic; specialize per stack via steering and skills. tools: Read, Grep, Glob, Edit, Write, Bash --- -You implement **one task at a time**, test-first, and leave the spec honest: the -test exists before the code, the gate is green, and the task's box is checked when -it lands. You write code; you do not redesign. +You implement **one task at a time**, following the spec's `development_flow`, and +leave the spec honest: the behavior is covered by a passing test, the gate is green, +and the task's box is checked when it lands. You write code; you do not redesign. ## Role @@ -24,6 +24,9 @@ the discipline *around* one task: scope, evidence, task-marking, and reporting. ## Collect inputs first - Which single task? (one leaf ID from `tasks.md`, with its `_Requirements:_` / `_Boundary:_`). +- What is the spec's `development_flow` in `spec.json` (absent ⇒ `tdd`)? It decides whether + the test comes before (`tdd`/`tdd-e2e`) or alongside (`unit`) the code, and whether an + end-to-end check is in scope (`tdd-e2e`). - What observable behavior does it add? State it in one sentence before editing. - Which `design.md` boundary owns it, and what is the project's test command? - Will it call a library/framework API? If so, confirm the signature against current @@ -35,9 +38,14 @@ the discipline *around* one task: scope, evidence, task-marking, and reporting. 1. **Read the contract.** Read the task, its requirement IDs in `requirements.md`, and the component it belongs to in `design.md`. Confirm the work fits inside that boundary; if it does not, stop and report — do not widen scope. -2. **RED → GREEN → REFACTOR.** Run the `tdd-cycle` skill for this one task: write the - smallest failing test, confirm it fails for the expected reason, write the least - code to pass, then refactor under green. +2. **Implement per the flow.** Write one test per *observable behavior*, not per branch. + - `tdd` / `tdd-e2e`: run the `tdd-cycle` skill — write the smallest failing test, + confirm it fails for the expected reason, write the least code to pass, refactor + under green. + - `unit`: write the implementation, then cover the behavior with a unit test in the + same task; both end green. No RED-first step is required. + - `tdd-e2e`: additionally exercise the affected golden/error flow end-to-end (e.g. the + `frontend-e2e-qa` skill or the project's e2e suite) when the task completes a flow. 3. **Widen the net.** Run the broader relevant suite (plus lint/typecheck/build if the project defines them) to catch regressions. 4. **Run the gate.** Use the `verify-change` skill to run the project's full @@ -78,13 +86,15 @@ variants only add the stack-specific knowledge. ## Gotchas -- If no test framework exists, say so explicitly and stop — do not silently skip RED. +- If no test framework exists, say so explicitly and stop — do not silently skip the test + (RED under `tdd`/`tdd-e2e`, the same-task unit test under `unit`). - A passing suite is not proof of coverage; make sure the new behavior is exercised. - Do not mark a task `[x]` while any step is blocked or the gate is red. ## Completion criteria -- [ ] A test failed before the change (RED) and passes after (GREEN). +- [ ] The behavior is covered by a passing test — RED-then-GREEN under `tdd`/`tdd-e2e`, a same-task unit test under `unit`. +- [ ] Under `tdd-e2e`, the affected golden/error flow passes end-to-end. - [ ] The relevant suite + gate (`verify-change`) are green. - [ ] `csdd spec test-report --run …` recorded the evidence. - [ ] `csdd spec diff-report ` refreshed the Changes view for the slice. diff --git a/internal/templater/templates/rules/definition-of-done.md.tmpl b/internal/templater/templates/rules/definition-of-done.md.tmpl index ca098ae..e9dd169 100644 --- a/internal/templater/templates/rules/definition-of-done.md.tmpl +++ b/internal/templater/templates/rules/definition-of-done.md.tmpl @@ -6,12 +6,16 @@ A task or PR is **done** only when every box is checked. "It compiles" and ## Per task (one task from tasks.md) - [ ] The active spec was read; the change stays within its approved boundary. -- [ ] Test-first: a test failed (RED) for the expected reason before the implementation, when the task adds behavior. -- [ ] That test now passes (GREEN); refactoring kept it green. +- [ ] **Under `tdd`/`tdd-e2e`:** a test failed (RED) for the expected reason before the + implementation, when the task adds behavior; that test now passes (GREEN) and + refactoring kept it green. +- [ ] **Under `unit`:** the behavior is covered by a unit test that passes (no RED-first + evidence required). +- [ ] **Under `tdd-e2e`:** the end-to-end coverage for the affected golden/error flow runs green. - [ ] The relevant suite, lint, and typecheck pass for the touched scope. -- [ ] Unit tests were run **through the csdd CLI** so the spec carries the evidence: +- [ ] Unit tests were run **through the csdd CLI** so the spec carries the evidence (**every flow**): `csdd spec test-report --run --lang ` wrote `specs//test-report.json` - (tests green, coverage recorded, no open attentions). + (tests green, coverage recorded, no open attentions). Missing this evidence is a blocked item. - [ ] Scope was respected — no opportunistic refactors or unrelated changes. - [ ] `## Implementation Notes` in tasks.md updated with any cross-task learning. - [ ] Report lists: task ID, files changed, tests added, commands run, remaining risks. diff --git a/internal/templater/templates/rules/tasks-generation.md.tmpl b/internal/templater/templates/rules/tasks-generation.md.tmpl index 32c77dc..13aaf0d 100644 --- a/internal/templater/templates/rules/tasks-generation.md.tmpl +++ b/internal/templater/templates/rules/tasks-generation.md.tmpl @@ -11,24 +11,38 @@ - `_Depends: 3.1, 4.2_` — cross-boundary dependencies only; sequential order is implicit. - `(P)` — parallel-capable, runs alongside other (P) tasks in different boundaries. -## Test-First (TDD) -- Every behavior is delivered as a **RED test sub-task immediately followed by a GREEN - implementation sub-task**. The test is written first and must fail for the expected - reason before the implementation exists. -- Name the pair explicitly: `2.1 RED — write the failing test for X`, `2.2 GREEN — minimal - implementation to pass 2.1`. -- The GREEN sub-task depends on its RED sub-task being in place; refactor only under green. -- Pure scaffolding (migrations, type skeletons, config) needs no RED test — but any task - that adds observable behavior does. -- Phase 4 (Validation) holds end-to-end tests, not the unit tests — those live with their behavior in Phases 2–3. -- Run unit tests through the csdd CLI so the spec keeps traceable evidence: +## Development flow (read `development_flow` from spec.json first) + +The spec's `development_flow` decides how behavior tasks are shaped. Resolve it +before generating tasks (absent ⇒ `tdd`): + +- **`tdd`** (default) and **`tdd-e2e`** — every behavior is delivered as a **RED test + sub-task immediately followed by a GREEN implementation sub-task**. The test is written + first and must fail for the expected reason before the implementation exists. Name the + pair explicitly: `2.1 RED — write the failing test for X`, `2.2 GREEN — minimal + implementation to pass 2.1`. The GREEN sub-task depends on its RED sub-task; refactor + only under green. +- **`unit`** — write the implementation, then cover it with unit tests in the **same** + task. No preceding RED sub-task is required, but **every behavior task still carries a + test sub-task** covering that behavior — `unit` skips the RED-first ritual, not the test. +- **`tdd-e2e`** — in addition to the per-behavior RED/GREEN pairs, **Phase 4 (Validation) + includes an end-to-end task** covering the golden flow and the top error flows. + +Common to every flow: +- One test per **observable behavior**, not one per code branch — keep the test count + proportional to behavior, not to internal control flow. +- Pure scaffolding (migrations, type skeletons, config) needs no test sub-task. +- Phase 4 (Validation) holds end-to-end tests, not the unit tests — those live with their + behavior in Phases 2–3. +- Run unit tests through the csdd CLI so the spec keeps traceable evidence (**all flows**): `csdd spec test-report --run --lang ` writes `specs//test-report.json`. Phase 4 SHOULD include a task that records this evidence (tests green, coverage captured). ## Avoid - Generic tasks like "implement the backend". - File-level instructions like "edit albums.ts". -- An implementation sub-task for a behavior with no preceding test sub-task. +- A behavior with no test sub-task at all (under `tdd`/`tdd-e2e`, no *preceding* test). +- A test per branch instead of per observable behavior (test bloat). ## Prefer - Capabilities and outcomes: @@ -41,4 +55,6 @@ - [ ] Every `(P)` task carries `_Boundary:_`. - [ ] Every `_Depends:_` references a task ID that exists in this file. - [ ] No two `(P)` tasks share the same boundary. -- [ ] Every behavior implementation (GREEN) sub-task is preceded by a test (RED) sub-task for the same behavior. +- [ ] Under `tdd`/`tdd-e2e`: every behavior implementation (GREEN) sub-task is preceded by a test (RED) sub-task for the same behavior. +- [ ] Under `unit`: every behavior task carries a test sub-task (no preceding RED required). +- [ ] Under `tdd-e2e`: Phase 4 includes an end-to-end task for the golden + top error flows. diff --git a/internal/templater/templates/skills/tdd-cycle/SKILL.md.tmpl b/internal/templater/templates/skills/tdd-cycle/SKILL.md.tmpl index ce6a3c0..6990684 100644 --- a/internal/templater/templates/skills/tdd-cycle/SKILL.md.tmpl +++ b/internal/templater/templates/skills/tdd-cycle/SKILL.md.tmpl @@ -1,6 +1,6 @@ --- name: tdd-cycle -description: Execute one small red→green→refactor cycle for a single task from specs/*/tasks.md. Use when implementing a behavior task that needs test-first development. +description: Execute one small red→green→refactor cycle for a single task from specs/*/tasks.md. Use for behavior tasks under the test-first flows (development_flow tdd or tdd-e2e); under the unit flow the implementer covers behavior with a same-task test instead of this RED-first cycle. --- # TDD Cycle @@ -8,6 +8,14 @@ description: Execute one small red→green→refactor cycle for a single task fr Use this skill for **exactly one task at a time**, driving it through red → green → refactor. +## When this applies + +This is the **test-first** cycle for specs whose `development_flow` is `tdd` or +`tdd-e2e`. Under the `unit` flow there is no RED-first step — the implementer writes +the code and covers the behavior with a unit test in the same task; skip this skill +for those tasks. In every flow, write **one test per observable behavior, not one per +code branch** — keep the test count proportional to behavior. + ## Default Operation Mode Direct implementation, test-first. The first artifact you produce is a failing diff --git a/internal/templater/templates/spec/spec.json.tmpl b/internal/templater/templates/spec/spec.json.tmpl index a60537e..0ac498f 100644 --- a/internal/templater/templates/spec/spec.json.tmpl +++ b/internal/templater/templates/spec/spec.json.tmpl @@ -2,6 +2,7 @@ "feature_name": "{{.Feature}}", "language": "{{.Language}}", "phase": "initialized", + "development_flow": "{{.DevelopmentFlow}}", "approvals": { "requirements": { "generated": false, "approved": false }, "design": { "generated": false, "approved": false }, diff --git a/internal/templater/templates/spec/tasks.md.tmpl b/internal/templater/templates/spec/tasks.md.tmpl index 29917fb..bfda256 100644 --- a/internal/templater/templates/spec/tasks.md.tmpl +++ b/internal/templater/templates/spec/tasks.md.tmpl @@ -7,10 +7,14 @@ Annotation rules (see .claude/rules/tasks-generation.md): - _Depends: 3.1, 4.2_ — cross-boundary dependencies only; sequential order is implicit. - (P) — parallel-capable, runs alongside other (P) tasks in different boundaries. -Test-first (TDD): every behavior is a RED test sub-task immediately followed by -the minimal GREEN implementation sub-task. Write the failing test, confirm it -fails for the expected reason, then write the least code to pass; refactor under -green. Sub-tasks: 1-3 hours of focused work. +Task structure follows the spec's development_flow (see spec.json; absent ⇒ tdd): +- tdd / tdd-e2e: every behavior is a RED test sub-task immediately followed by the + minimal GREEN implementation sub-task (write the failing test, confirm it fails for the + expected reason, then the least code to pass; refactor under green); tdd-e2e also adds + an end-to-end task in Phase 4. +- unit: write the implementation, then cover the behavior with a unit test in the same + task — no preceding RED sub-task, but every behavior still carries a test. +One test per observable behavior, not per branch. Sub-tasks: 1-3 hours of focused work. --> ## Phase 1: Foundation diff --git a/internal/tui/wizard.go b/internal/tui/wizard.go index d0113e4..0805a67 100644 --- a/internal/tui/wizard.go +++ b/internal/tui/wizard.go @@ -345,12 +345,13 @@ func (w wizardModel) submit() tea.Cmd { if err := workspace.KebabCheck(feature, "feature"); err != nil { return resultMsg{text: err.Error(), isErr: true} } - // Use the CLI Run path to avoid duplicating logic for spec init. - code := cli.Run([]string{"spec", "init", feature}, w.templates) - if code != 0 { - return resultMsg{text: "spec init failed (exit " + fmt.Sprint(code) + ")", isErr: true} + flow := getStr(w.state, "flow") + if err := cli.SpecInit(w.templates, cli.SpecInitOptions{ + Root: w.root, Feature: feature, Flow: flow, + }); err != nil { + return resultMsg{text: err.Error(), isErr: true} } - return resultMsg{text: "spec '" + feature + "' initialized"} + return resultMsg{text: "spec '" + feature + "' initialized (flow: " + flow + ")"} case "generate": artifact := getStr(w.state, "artifact") if err := cli.SpecGenerate(w.templates, cli.SpecGenerateOptions{ @@ -535,6 +536,11 @@ func specFields() []*field { kind: fText, required: true, input: newTextInput("photo-albums"), validate: validateKebab, }, + { + name: "flow", label: "Development flow", kind: fSelect, required: true, + choices: []string{"tdd", "unit", "tdd-e2e"}, + skipIf: func(s map[string]any) bool { return getStr(s, "operation") != "init" }, + }, { name: "artifact", label: "Artifact", kind: fSelect, required: true, choices: workspace.SpecArtifacts, diff --git a/internal/tui/wizard_test.go b/internal/tui/wizard_test.go index 414587b..7f4d5dc 100644 --- a/internal/tui/wizard_test.go +++ b/internal/tui/wizard_test.go @@ -64,7 +64,7 @@ func TestNewWizardFieldsPerKind(t *testing.T) { n int }{ {wizSteering, "Create steering", 5}, - {wizSpec, "Spec wizard", 4}, + {wizSpec, "Spec wizard", 5}, {wizSkill, "Create skill", 3}, {wizAgent, "Create agent", 5}, {wizMCP, "Add MCP server", 7}, @@ -188,6 +188,29 @@ func TestWizardSubmitSkill(t *testing.T) { } } +// TestWizardSubmitSpecInitFlow asserts the wizard's spec-init submit carries the +// selected development flow into the shared SpecInit op (Req 1.4). +func TestWizardSubmitSpecInitFlow(t *testing.T) { + dir := freshTUIWorkspace(t) + w := wizardModel{kind: wizSpec, templates: templater.FS, root: dir, state: map[string]any{ + "operation": "init", + "feature": "wizard-flow", + "flow": "unit", + }} + msg := w.submit()() + rm, ok := msg.(resultMsg) + if !ok || rm.isErr { + t.Fatalf("spec init submit failed: %#v", msg) + } + b, err := os.ReadFile(filepath.Join(dir, "specs", "wizard-flow", "spec.json")) + if err != nil { + t.Fatalf("spec.json not created: %v", err) + } + if !strings.Contains(string(b), `"development_flow": "unit"`) { + t.Errorf("wizard did not persist flow=unit:\n%s", b) + } +} + func TestWizardSubmitAgentDefaultsTools(t *testing.T) { dir := freshTUIWorkspace(t) // Empty tools slice in state must fall back to least-privilege Read+Grep. diff --git a/internal/validator/validator.go b/internal/validator/validator.go index 021f6b9..ba66ef7 100644 --- a/internal/validator/validator.go +++ b/internal/validator/validator.go @@ -454,6 +454,16 @@ func sortedJoin(in []string) string { return strings.Join(cp, ", ") } +// validDevelopmentFlow reports whether f is one of the selectable development +// flows. Mirrors cli.developmentFlows (validator must not import cli). +func validDevelopmentFlow(f string) bool { + switch f { + case "unit", "tdd", "tdd-e2e": + return true + } + return false +} + // ValidateSteering inspects every (or one) steering file's frontmatter and // reports inclusion-mode errors. Pass empty name to validate every file. func ValidateSteering(steeringDir, name string) ([]Issue, error) { @@ -480,6 +490,9 @@ func ValidateSteering(steeringDir, name string) ([]Issue, error) { continue } fm := frontmatter.Parse(string(data)) + if df := fm.AsString("default_development_flow", ""); df != "" && !validDevelopmentFlow(df) { + issues = append(issues, Issue{File: fname, Msg: "default_development_flow must be one of unit|tdd|tdd-e2e (got '" + df + "')"}) + } inc := fm.AsString("inclusion", "") valid := false for _, m := range []string{"always", "fileMatch", "manual", "auto"} { diff --git a/internal/validator/validator_test.go b/internal/validator/validator_test.go index a32e02b..49622d4 100644 --- a/internal/validator/validator_test.go +++ b/internal/validator/validator_test.go @@ -424,6 +424,32 @@ func TestValidateSteering(t *testing.T) { } } +// Req 2.3 / 3.2: an invalid default_development_flow is flagged; a valid or +// absent one is not. +func TestValidateSteeringFlowDefault(t *testing.T) { + dir := t.TempDir() + files := map[string]string{ + "valid-flow.md": "---\ninclusion: always\ndefault_development_flow: tdd-e2e\n---\n", + "absent-flow.md": "---\ninclusion: always\n---\n", + "invalid-flow.md": "---\ninclusion: always\ndefault_development_flow: bogus\n---\n", + } + for name, body := range files { + if err := os.WriteFile(filepath.Join(dir, name), []byte(body), 0o644); err != nil { + t.Fatal(err) + } + } + issues, err := ValidateSteering(dir, "") + if err != nil { + t.Fatal(err) + } + if len(issues) != 1 { + t.Fatalf("expected 1 issue, got %d: %v", len(issues), issues) + } + if issues[0].File != "invalid-flow.md" || !strings.Contains(issues[0].Msg, "default_development_flow") { + t.Errorf("unexpected issue: %+v", issues[0]) + } +} + func TestValidateSteeringByName(t *testing.T) { dir := t.TempDir() _ = os.WriteFile(filepath.Join(dir, "missing.md"), []byte(""), 0o644) diff --git a/mcp-server/src/tools/spec.ts b/mcp-server/src/tools/spec.ts index 4beef88..dd9e114 100644 --- a/mcp-server/src/tools/spec.ts +++ b/mcp-server/src/tools/spec.ts @@ -8,13 +8,24 @@ export const specTools: ToolDef[] = [ name: "csdd_spec_init", title: "Spec init", description: - "Create specs//spec.json (phase=initial, no approvals, not ready for implementation).", + "Create specs//spec.json (phase=initialized, no approvals, not ready for implementation).", inputSchema: { feature, language: z.string().optional().describe("Spec language (default: en)."), + flow: z + .enum(["unit", "tdd", "tdd-e2e"]) + .optional() + .describe("Development flow: unit (tests after code) | tdd (test-first, default) | tdd-e2e (TDD + e2e). Default: steering default, else tdd."), root: rootField, }, - toArgs: (p) => ["spec", "init", p.feature, ...flag("--language", p.language), ...rootArg(p)], + toArgs: (p) => [ + "spec", + "init", + p.feature, + ...flag("--language", p.language), + ...flag("--flow", p.flow), + ...rootArg(p), + ], }, { name: "csdd_spec_list",