diff --git a/internal/cli/spec.go b/internal/cli/spec.go index dcc1d04..ad8abc4 100644 --- a/internal/cli/spec.go +++ b/internal/cli/spec.go @@ -15,6 +15,7 @@ import ( "strings" "time" + "github.com/protonspy/csdd/internal/frontmatter" "github.com/protonspy/csdd/internal/paths" "github.com/protonspy/csdd/internal/render" "github.com/protonspy/csdd/internal/session" @@ -28,11 +29,72 @@ type SpecJSON struct { FeatureName string `json:"feature_name"` Language string `json:"language"` Phase string `json:"phase"` + DevelopmentFlow string `json:"development_flow,omitempty"` Approvals map[string]ApprovalFlag `json:"approvals"` ReadyForImplementation bool `json:"ready_for_implementation"` CreatedAt string `json:"created_at"` } +// defaultDevelopmentFlow is the flow assumed when none is selected and steering +// declares no default. It keeps csdd's test-first posture as the default. +const defaultDevelopmentFlow = "tdd" + +// developmentFlows is the closed set of selectable development flows: +// - unit: tests written after the code (no RED-first ritual) +// - tdd: test-first RED→GREEN (the default) +// - tdd-e2e: TDD plus end-to-end coverage of golden and error flows +var developmentFlows = []string{"unit", "tdd", "tdd-e2e"} + +// validDevelopmentFlow reports whether f is one of the selectable flows. +func validDevelopmentFlow(f string) bool { + for _, v := range developmentFlows { + if v == f { + return true + } + } + return false +} + +// effectiveFlow coerces a stored (possibly empty, e.g. legacy spec.json) flow to +// the value callers should act on. Absent ⇒ defaultDevelopmentFlow. +func effectiveFlow(f string) string { + if f == "" { + return defaultDevelopmentFlow + } + return f +} + +// resolveDefaultFlow returns the workspace default development flow declared in +// steering frontmatter (key default_development_flow). Steering files are scanned +// in name order; the first valid declared value wins. Absent or invalid ⇒ +// defaultDevelopmentFlow (an invalid value is separately flagged by the steering +// validator, so init stays robust rather than writing a bad flow). +func resolveDefaultFlow(root string) string { + dir := paths.Steering(root) + entries, err := os.ReadDir(dir) + if err != nil { + return defaultDevelopmentFlow + } + var names []string + for _, e := range entries { + if !e.IsDir() && strings.HasSuffix(e.Name(), ".md") { + names = append(names, e.Name()) + } + } + sort.Strings(names) + for _, n := range names { + data, err := os.ReadFile(filepath.Join(dir, n)) + if err != nil { + continue + } + fm := frontmatter.Parse(string(data)) + if v := fm.AsString("default_development_flow", ""); validDevelopmentFlow(v) { + return v + } + } + return defaultDevelopmentFlow +} + // ApprovalFlag tracks generation + approval state for a phase. type ApprovalFlag struct { Generated bool `json:"generated"` @@ -72,57 +134,84 @@ func runSpec(args []string, templates embed.FS) int { } } -func specInit(args []string, templates embed.FS) int { - fs := flag.NewFlagSet("spec init", flag.ContinueOnError) - var root, language string - addRoot(fs, &root) - fs.StringVar(&language, "language", "en", "Spec language (default: en).") - positionals, err := parseFlags(fs, args) - if err != nil { - return failOnFlagParse(err) - } - if len(positionals) < 1 { - render.Err("usage: " + prog() + " spec init FEATURE") - return 1 +// SpecInitOptions is the headless input shared by the CLI action and the TUI +// wizard. Flow "" resolves the steering default, then defaultDevelopmentFlow. +type SpecInitOptions struct { + Root string + Feature string + Language string + Flow string +} + +// SpecInit creates specs//spec.json with the resolved development flow. +// It is the single source of truth for spec creation; the CLI action and the TUI +// wizard both fill SpecInitOptions and call it. An explicit invalid Flow is +// rejected before any write (the caller maps the error to exit 1). +func SpecInit(templates embed.FS, opts SpecInitOptions) error { + if opts.Flow != "" && !validDevelopmentFlow(opts.Flow) { + return fmt.Errorf("invalid development flow %q: must be one of %s", opts.Flow, strings.Join(developmentFlows, "|")) } - r, err := workspace.Resolve(root) + r, err := workspace.Resolve(opts.Root) if err != nil { - render.Err(err.Error()) - return 1 + return err } if _, err := workspace.SpecsDir(r); err != nil { - render.Err(err.Error()) - return 1 + return err } - feature := positionals[0] - if err := workspace.KebabCheck(feature, "feature"); err != nil { - render.Err(err.Error()) - return 1 + if err := workspace.KebabCheck(opts.Feature, "feature"); err != nil { + return err } - target := filepath.Join(paths.Specs(r), feature) + target := filepath.Join(paths.Specs(r), opts.Feature) if pathExists(target) { - render.Err("spec already exists: " + workspace.Relative(r, target)) - return 1 + return errors.New("spec already exists: " + workspace.Relative(r, target)) + } + flow := opts.Flow + if flow == "" { + flow = resolveDefaultFlow(r) + } + language := opts.Language + if language == "" { + language = "en" } if err := mkdirAll(target); err != nil { - render.Err(err.Error()) - return 1 + return err } content, err := templater.Render(templates, "templates/spec/spec.json.tmpl", map[string]string{ - "Feature": feature, - "Language": language, - "CreatedAt": time.Now().UTC().Format("2006-01-02T15:04:05Z"), + "Feature": opts.Feature, + "Language": language, + "DevelopmentFlow": flow, + "CreatedAt": time.Now().UTC().Format("2006-01-02T15:04:05Z"), }) if err != nil { - render.Err(err.Error()) - return 1 + return err } if err := workspace.WriteFile(filepath.Join(target, "spec.json"), content, false); err != nil { + return err + } + render.OK("created " + workspace.Relative(r, target) + "/ (flow: " + flow + ")") + render.Info(fmt.Sprintf("next: `%s spec generate --artifact requirements`", prog())) + return nil +} + +func specInit(args []string, templates embed.FS) int { + fs := flag.NewFlagSet("spec init", flag.ContinueOnError) + var opts SpecInitOptions + addRoot(fs, &opts.Root) + fs.StringVar(&opts.Language, "language", "en", "Spec language (default: en).") + fs.StringVar(&opts.Flow, "flow", "", "Development flow: unit|tdd|tdd-e2e (default: steering default, else tdd).") + positionals, err := parseFlags(fs, args) + if err != nil { + return failOnFlagParse(err) + } + if len(positionals) < 1 { + render.Err("usage: " + prog() + " spec init FEATURE") + return 1 + } + opts.Feature = positionals[0] + if err := SpecInit(templates, opts); err != nil { render.Err(err.Error()) return 1 } - render.OK("created " + workspace.Relative(r, target) + "/") - render.Info(fmt.Sprintf("next: `%s spec generate --artifact requirements`", prog())) return 0 } @@ -223,6 +312,7 @@ func specShow(args []string) int { fmt.Println(render.Bold("feature: " + data.FeatureName)) fmt.Println("phase: " + data.Phase) fmt.Println("language: " + data.Language) + fmt.Println("flow: " + effectiveFlow(data.DevelopmentFlow)) fmt.Println("created: " + data.CreatedAt) fmt.Printf("ready: %v\n", data.ReadyForImplementation) fmt.Println("approvals:") diff --git a/internal/cli/spec_flow_test.go b/internal/cli/spec_flow_test.go new file mode 100644 index 0000000..aaf081a --- /dev/null +++ b/internal/cli/spec_flow_test.go @@ -0,0 +1,163 @@ +package cli + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/protonspy/csdd/internal/templater" +) + +// readSpecFlow returns the raw development_flow field written to a spec.json. +func readSpecFlow(t *testing.T, dir, feature string) (SpecJSON, []byte) { + t.Helper() + b, err := os.ReadFile(filepath.Join(dir, "specs", feature, "spec.json")) + if err != nil { + t.Fatalf("read spec.json: %v", err) + } + var s SpecJSON + if err := json.Unmarshal(b, &s); err != nil { + t.Fatalf("unmarshal spec.json: %v", err) + } + return s, b +} + +// Req 1.1 / 8.3: each valid flow is persisted and surfaced by `spec show`. +func TestSpecInitFlowPersistedAndShown(t *testing.T) { + for _, flow := range []string{"unit", "tdd", "tdd-e2e"} { + flow := flow + t.Run(flow, func(t *testing.T) { + dir := freshWorkspace(t) + if code, _, errOut := run(t, "spec", "init", "feat-"+flow, "--flow", flow, "--root", dir); code != 0 { + t.Fatalf("init --flow %s: code=%d err=%q", flow, code, errOut) + } + s, _ := readSpecFlow(t, dir, "feat-"+flow) + if s.DevelopmentFlow != flow { + t.Errorf("development_flow = %q, want %q", s.DevelopmentFlow, flow) + } + _, showOut, _ := run(t, "spec", "show", "feat-"+flow, "--root", dir) + if !strings.Contains(showOut, "flow:") || !strings.Contains(showOut, flow) { + t.Errorf("show output missing flow %q: %s", flow, showOut) + } + }) + } +} + +// Req 1.2: an invalid flow is rejected (exit 1) and writes no spec.json. +func TestSpecInitFlowInvalid(t *testing.T) { + dir := freshWorkspace(t) + code, _, errOut := run(t, "spec", "init", "bad-flow", "--flow", "xunit", "--root", dir) + if code != 1 { + t.Errorf("invalid flow: code=%d, want 1", code) + } + if !strings.Contains(errOut, "flow") { + t.Errorf("error should mention flow: %q", errOut) + } + if _, err := os.Stat(filepath.Join(dir, "specs", "bad-flow")); !os.IsNotExist(err) { + t.Error("spec dir must not be created when --flow is invalid") + } +} + +// Req 1.3 / 2.2: with no steering default, an omitted flow resolves to tdd. +func TestSpecInitFlowDefaultsToTdd(t *testing.T) { + dir := freshWorkspace(t) + if code, _, errOut := run(t, "spec", "init", "no-flow", "--root", dir); code != 0 { + t.Fatalf("init: code=%d err=%q", code, errOut) + } + s, _ := readSpecFlow(t, dir, "no-flow") + if s.DevelopmentFlow != "tdd" { + t.Errorf("development_flow = %q, want tdd", s.DevelopmentFlow) + } +} + +// Req 1.3 / 2.1: a steering-declared default is applied when flow is omitted. +func TestSpecInitFlowSteeringDefault(t *testing.T) { + dir := freshWorkspace(t) + steering := filepath.Join(dir, ".claude", "steering", "zzz-flow-default.md") + body := "---\ninclusion: manual\ndefault_development_flow: tdd-e2e\n---\n# default\n" + if err := os.WriteFile(steering, []byte(body), 0o644); err != nil { + t.Fatal(err) + } + if code, _, errOut := run(t, "spec", "init", "inherits", "--root", dir); code != 0 { + t.Fatalf("init: code=%d err=%q", code, errOut) + } + s, _ := readSpecFlow(t, dir, "inherits") + if s.DevelopmentFlow != "tdd-e2e" { + t.Errorf("development_flow = %q, want tdd-e2e", s.DevelopmentFlow) + } + // An explicit --flow overrides the steering default. + if code, _, _ := run(t, "spec", "init", "explicit", "--flow", "unit", "--root", dir); code != 0 { + t.Fatal("init explicit failed") + } + s2, _ := readSpecFlow(t, dir, "explicit") + if s2.DevelopmentFlow != "unit" { + t.Errorf("explicit flow = %q, want unit", s2.DevelopmentFlow) + } +} + +// Req 2.2: an invalid steering default does not corrupt init; it falls back to tdd. +func TestResolveDefaultFlowInvalidFallsBack(t *testing.T) { + dir := freshWorkspace(t) + steering := filepath.Join(dir, ".claude", "steering", "zzz-bad.md") + if err := os.WriteFile(steering, []byte("---\ninclusion: manual\ndefault_development_flow: bogus\n---\n"), 0o644); err != nil { + t.Fatal(err) + } + if got := resolveDefaultFlow(dir); got != "tdd" { + t.Errorf("resolveDefaultFlow with invalid steering = %q, want tdd", got) + } +} + +// Req 3.1: a legacy spec.json with no development_flow is read as tdd. +func TestEffectiveFlowLegacy(t *testing.T) { + if got := effectiveFlow(""); got != "tdd" { + t.Errorf("effectiveFlow(\"\") = %q, want tdd", got) + } + if got := effectiveFlow("unit"); got != "unit" { + t.Errorf("effectiveFlow(unit) = %q, want unit", got) + } +} + +// Req 3.3: saving a spec with no flow omits the field, so legacy files stay clean. +func TestSaveSpecJSONOmitsEmptyFlow(t *testing.T) { + dir := t.TempDir() + if err := saveSpecJSON(dir, SpecJSON{FeatureName: "legacy", Phase: "initialized"}); err != nil { + t.Fatal(err) + } + b, err := os.ReadFile(filepath.Join(dir, "spec.json")) + if err != nil { + t.Fatal(err) + } + if strings.Contains(string(b), "development_flow") { + t.Errorf("empty flow must be omitted from spec.json, got:\n%s", b) + } +} + +func TestValidDevelopmentFlow(t *testing.T) { + for _, ok := range []string{"unit", "tdd", "tdd-e2e"} { + if !validDevelopmentFlow(ok) { + t.Errorf("validDevelopmentFlow(%q) = false, want true", ok) + } + } + for _, bad := range []string{"", "TDD", "e2e", "unit-test"} { + if validDevelopmentFlow(bad) { + t.Errorf("validDevelopmentFlow(%q) = true, want false", bad) + } + } +} + +// Req 1.1: the exported op is the single source of truth (CLI + TUI call it). +func TestSpecInitOpWritesFlow(t *testing.T) { + dir := freshWorkspace(t) + if err := SpecInit(templater.FS, SpecInitOptions{Root: dir, Feature: "via-op", Flow: "unit"}); err != nil { + t.Fatalf("SpecInit: %v", err) + } + s, _ := readSpecFlow(t, dir, "via-op") + if s.DevelopmentFlow != "unit" { + t.Errorf("op-written flow = %q, want unit", s.DevelopmentFlow) + } + if err := SpecInit(templater.FS, SpecInitOptions{Root: dir, Feature: "bad", Flow: "nope"}); err == nil { + t.Error("SpecInit with invalid flow should error") + } +} diff --git a/internal/session/session.go b/internal/session/session.go index 7e4428b..a28a1ad 100644 --- a/internal/session/session.go +++ b/internal/session/session.go @@ -26,6 +26,7 @@ type specJSON struct { FeatureName string `json:"feature_name"` Language string `json:"language"` Phase string `json:"phase"` + DevelopmentFlow string `json:"development_flow,omitempty"` Approvals map[string]approvalFlag `json:"approvals"` ReadyForImplementation bool `json:"ready_for_implementation"` CreatedAt string `json:"created_at"` diff --git a/internal/session/session_test.go b/internal/session/session_test.go index d8de82f..941a11b 100644 --- a/internal/session/session_test.go +++ b/internal/session/session_test.go @@ -8,6 +8,23 @@ import ( "testing" ) +// specMirrorFixture exercises the session specJSON mirror's development_flow +// field. The cli package asserts the same field independently (spec_flow_test.go); +// the two mirror structs are kept tag-for-tag in sync by review discipline. +const specMirrorFixture = `{"feature_name":"x","language":"en","phase":"initialized","development_flow":"tdd-e2e","approvals":{},"ready_for_implementation":false,"created_at":"2026-01-01T00:00:00Z"}` + +// TestSpecJSONMirrorParsesFlow asserts the read-only mirror parses the +// development_flow field (kept in sync with cli.SpecJSON). +func TestSpecJSONMirrorParsesFlow(t *testing.T) { + var s specJSON + if err := json.Unmarshal([]byte(specMirrorFixture), &s); err != nil { + t.Fatal(err) + } + if s.DevelopmentFlow != "tdd-e2e" { + t.Errorf("mirror DevelopmentFlow = %q, want tdd-e2e", s.DevelopmentFlow) + } +} + // writeWorkspace materializes a temp workspace from a path→content map and // returns the root. Paths are slash-separated and relative to the root. func writeWorkspace(t *testing.T, files map[string]string) string { diff --git a/internal/templater/templates/agents/implementer.md.tmpl b/internal/templater/templates/agents/implementer.md.tmpl index 43f8d4f..8645b6f 100644 --- a/internal/templater/templates/agents/implementer.md.tmpl +++ b/internal/templater/templates/agents/implementer.md.tmpl @@ -1,12 +1,12 @@ --- name: implementer -description: Implements a single task from specs/*/tasks.md test-first (RED→GREEN→REFACTOR) inside its approved design.md boundary — runs the verification gate, records evidence, marks the task done, updates notes, and reports. Language-agnostic; specialize per stack via steering and skills. +description: Implements a single task from specs/*/tasks.md inside its approved design.md boundary, following the spec's development_flow (tdd test-first by default; unit tests-after; tdd-e2e adds end-to-end) — runs the verification gate, records evidence, marks the task done, updates notes, and reports. Language-agnostic; specialize per stack via steering and skills. tools: Read, Grep, Glob, Edit, Write, Bash --- -You implement **one task at a time**, test-first, and leave the spec honest: the -test exists before the code, the gate is green, and the task's box is checked when -it lands. You write code; you do not redesign. +You implement **one task at a time**, following the spec's `development_flow`, and +leave the spec honest: the behavior is covered by a passing test, the gate is green, +and the task's box is checked when it lands. You write code; you do not redesign. ## Role @@ -24,6 +24,9 @@ the discipline *around* one task: scope, evidence, task-marking, and reporting. ## Collect inputs first - Which single task? (one leaf ID from `tasks.md`, with its `_Requirements:_` / `_Boundary:_`). +- What is the spec's `development_flow` in `spec.json` (absent ⇒ `tdd`)? It decides whether + the test comes before (`tdd`/`tdd-e2e`) or alongside (`unit`) the code, and whether an + end-to-end check is in scope (`tdd-e2e`). - What observable behavior does it add? State it in one sentence before editing. - Which `design.md` boundary owns it, and what is the project's test command? - Will it call a library/framework API? If so, confirm the signature against current @@ -35,9 +38,14 @@ the discipline *around* one task: scope, evidence, task-marking, and reporting. 1. **Read the contract.** Read the task, its requirement IDs in `requirements.md`, and the component it belongs to in `design.md`. Confirm the work fits inside that boundary; if it does not, stop and report — do not widen scope. -2. **RED → GREEN → REFACTOR.** Run the `tdd-cycle` skill for this one task: write the - smallest failing test, confirm it fails for the expected reason, write the least - code to pass, then refactor under green. +2. **Implement per the flow.** Write one test per *observable behavior*, not per branch. + - `tdd` / `tdd-e2e`: run the `tdd-cycle` skill — write the smallest failing test, + confirm it fails for the expected reason, write the least code to pass, refactor + under green. + - `unit`: write the implementation, then cover the behavior with a unit test in the + same task; both end green. No RED-first step is required. + - `tdd-e2e`: additionally exercise the affected golden/error flow end-to-end (e.g. the + `frontend-e2e-qa` skill or the project's e2e suite) when the task completes a flow. 3. **Widen the net.** Run the broader relevant suite (plus lint/typecheck/build if the project defines them) to catch regressions. 4. **Run the gate.** Use the `verify-change` skill to run the project's full @@ -78,13 +86,15 @@ variants only add the stack-specific knowledge. ## Gotchas -- If no test framework exists, say so explicitly and stop — do not silently skip RED. +- If no test framework exists, say so explicitly and stop — do not silently skip the test + (RED under `tdd`/`tdd-e2e`, the same-task unit test under `unit`). - A passing suite is not proof of coverage; make sure the new behavior is exercised. - Do not mark a task `[x]` while any step is blocked or the gate is red. ## Completion criteria -- [ ] A test failed before the change (RED) and passes after (GREEN). +- [ ] The behavior is covered by a passing test — RED-then-GREEN under `tdd`/`tdd-e2e`, a same-task unit test under `unit`. +- [ ] Under `tdd-e2e`, the affected golden/error flow passes end-to-end. - [ ] The relevant suite + gate (`verify-change`) are green. - [ ] `csdd spec test-report --run …` recorded the evidence. - [ ] `csdd spec diff-report ` refreshed the Changes view for the slice. diff --git a/internal/templater/templates/rules/definition-of-done.md.tmpl b/internal/templater/templates/rules/definition-of-done.md.tmpl index ca098ae..e9dd169 100644 --- a/internal/templater/templates/rules/definition-of-done.md.tmpl +++ b/internal/templater/templates/rules/definition-of-done.md.tmpl @@ -6,12 +6,16 @@ A task or PR is **done** only when every box is checked. "It compiles" and ## Per task (one task from tasks.md) - [ ] The active spec was read; the change stays within its approved boundary. -- [ ] Test-first: a test failed (RED) for the expected reason before the implementation, when the task adds behavior. -- [ ] That test now passes (GREEN); refactoring kept it green. +- [ ] **Under `tdd`/`tdd-e2e`:** a test failed (RED) for the expected reason before the + implementation, when the task adds behavior; that test now passes (GREEN) and + refactoring kept it green. +- [ ] **Under `unit`:** the behavior is covered by a unit test that passes (no RED-first + evidence required). +- [ ] **Under `tdd-e2e`:** the end-to-end coverage for the affected golden/error flow runs green. - [ ] The relevant suite, lint, and typecheck pass for the touched scope. -- [ ] Unit tests were run **through the csdd CLI** so the spec carries the evidence: +- [ ] Unit tests were run **through the csdd CLI** so the spec carries the evidence (**every flow**): `csdd spec test-report --run --lang ` wrote `specs//test-report.json` - (tests green, coverage recorded, no open attentions). + (tests green, coverage recorded, no open attentions). Missing this evidence is a blocked item. - [ ] Scope was respected — no opportunistic refactors or unrelated changes. - [ ] `## Implementation Notes` in tasks.md updated with any cross-task learning. - [ ] Report lists: task ID, files changed, tests added, commands run, remaining risks. diff --git a/internal/templater/templates/rules/tasks-generation.md.tmpl b/internal/templater/templates/rules/tasks-generation.md.tmpl index 32c77dc..13aaf0d 100644 --- a/internal/templater/templates/rules/tasks-generation.md.tmpl +++ b/internal/templater/templates/rules/tasks-generation.md.tmpl @@ -11,24 +11,38 @@ - `_Depends: 3.1, 4.2_` — cross-boundary dependencies only; sequential order is implicit. - `(P)` — parallel-capable, runs alongside other (P) tasks in different boundaries. -## Test-First (TDD) -- Every behavior is delivered as a **RED test sub-task immediately followed by a GREEN - implementation sub-task**. The test is written first and must fail for the expected - reason before the implementation exists. -- Name the pair explicitly: `2.1 RED — write the failing test for X`, `2.2 GREEN — minimal - implementation to pass 2.1`. -- The GREEN sub-task depends on its RED sub-task being in place; refactor only under green. -- Pure scaffolding (migrations, type skeletons, config) needs no RED test — but any task - that adds observable behavior does. -- Phase 4 (Validation) holds end-to-end tests, not the unit tests — those live with their behavior in Phases 2–3. -- Run unit tests through the csdd CLI so the spec keeps traceable evidence: +## Development flow (read `development_flow` from spec.json first) + +The spec's `development_flow` decides how behavior tasks are shaped. Resolve it +before generating tasks (absent ⇒ `tdd`): + +- **`tdd`** (default) and **`tdd-e2e`** — every behavior is delivered as a **RED test + sub-task immediately followed by a GREEN implementation sub-task**. The test is written + first and must fail for the expected reason before the implementation exists. Name the + pair explicitly: `2.1 RED — write the failing test for X`, `2.2 GREEN — minimal + implementation to pass 2.1`. The GREEN sub-task depends on its RED sub-task; refactor + only under green. +- **`unit`** — write the implementation, then cover it with unit tests in the **same** + task. No preceding RED sub-task is required, but **every behavior task still carries a + test sub-task** covering that behavior — `unit` skips the RED-first ritual, not the test. +- **`tdd-e2e`** — in addition to the per-behavior RED/GREEN pairs, **Phase 4 (Validation) + includes an end-to-end task** covering the golden flow and the top error flows. + +Common to every flow: +- One test per **observable behavior**, not one per code branch — keep the test count + proportional to behavior, not to internal control flow. +- Pure scaffolding (migrations, type skeletons, config) needs no test sub-task. +- Phase 4 (Validation) holds end-to-end tests, not the unit tests — those live with their + behavior in Phases 2–3. +- Run unit tests through the csdd CLI so the spec keeps traceable evidence (**all flows**): `csdd spec test-report --run --lang ` writes `specs//test-report.json`. Phase 4 SHOULD include a task that records this evidence (tests green, coverage captured). ## Avoid - Generic tasks like "implement the backend". - File-level instructions like "edit albums.ts". -- An implementation sub-task for a behavior with no preceding test sub-task. +- A behavior with no test sub-task at all (under `tdd`/`tdd-e2e`, no *preceding* test). +- A test per branch instead of per observable behavior (test bloat). ## Prefer - Capabilities and outcomes: @@ -41,4 +55,6 @@ - [ ] Every `(P)` task carries `_Boundary:_`. - [ ] Every `_Depends:_` references a task ID that exists in this file. - [ ] No two `(P)` tasks share the same boundary. -- [ ] Every behavior implementation (GREEN) sub-task is preceded by a test (RED) sub-task for the same behavior. +- [ ] Under `tdd`/`tdd-e2e`: every behavior implementation (GREEN) sub-task is preceded by a test (RED) sub-task for the same behavior. +- [ ] Under `unit`: every behavior task carries a test sub-task (no preceding RED required). +- [ ] Under `tdd-e2e`: Phase 4 includes an end-to-end task for the golden + top error flows. diff --git a/internal/templater/templates/skills/tdd-cycle/SKILL.md.tmpl b/internal/templater/templates/skills/tdd-cycle/SKILL.md.tmpl index ce6a3c0..6990684 100644 --- a/internal/templater/templates/skills/tdd-cycle/SKILL.md.tmpl +++ b/internal/templater/templates/skills/tdd-cycle/SKILL.md.tmpl @@ -1,6 +1,6 @@ --- name: tdd-cycle -description: Execute one small red→green→refactor cycle for a single task from specs/*/tasks.md. Use when implementing a behavior task that needs test-first development. +description: Execute one small red→green→refactor cycle for a single task from specs/*/tasks.md. Use for behavior tasks under the test-first flows (development_flow tdd or tdd-e2e); under the unit flow the implementer covers behavior with a same-task test instead of this RED-first cycle. --- # TDD Cycle @@ -8,6 +8,14 @@ description: Execute one small red→green→refactor cycle for a single task fr Use this skill for **exactly one task at a time**, driving it through red → green → refactor. +## When this applies + +This is the **test-first** cycle for specs whose `development_flow` is `tdd` or +`tdd-e2e`. Under the `unit` flow there is no RED-first step — the implementer writes +the code and covers the behavior with a unit test in the same task; skip this skill +for those tasks. In every flow, write **one test per observable behavior, not one per +code branch** — keep the test count proportional to behavior. + ## Default Operation Mode Direct implementation, test-first. The first artifact you produce is a failing diff --git a/internal/templater/templates/spec/spec.json.tmpl b/internal/templater/templates/spec/spec.json.tmpl index a60537e..0ac498f 100644 --- a/internal/templater/templates/spec/spec.json.tmpl +++ b/internal/templater/templates/spec/spec.json.tmpl @@ -2,6 +2,7 @@ "feature_name": "{{.Feature}}", "language": "{{.Language}}", "phase": "initialized", + "development_flow": "{{.DevelopmentFlow}}", "approvals": { "requirements": { "generated": false, "approved": false }, "design": { "generated": false, "approved": false }, diff --git a/internal/templater/templates/spec/tasks.md.tmpl b/internal/templater/templates/spec/tasks.md.tmpl index 29917fb..bfda256 100644 --- a/internal/templater/templates/spec/tasks.md.tmpl +++ b/internal/templater/templates/spec/tasks.md.tmpl @@ -7,10 +7,14 @@ Annotation rules (see .claude/rules/tasks-generation.md): - _Depends: 3.1, 4.2_ — cross-boundary dependencies only; sequential order is implicit. - (P) — parallel-capable, runs alongside other (P) tasks in different boundaries. -Test-first (TDD): every behavior is a RED test sub-task immediately followed by -the minimal GREEN implementation sub-task. Write the failing test, confirm it -fails for the expected reason, then write the least code to pass; refactor under -green. Sub-tasks: 1-3 hours of focused work. +Task structure follows the spec's development_flow (see spec.json; absent ⇒ tdd): +- tdd / tdd-e2e: every behavior is a RED test sub-task immediately followed by the + minimal GREEN implementation sub-task (write the failing test, confirm it fails for the + expected reason, then the least code to pass; refactor under green); tdd-e2e also adds + an end-to-end task in Phase 4. +- unit: write the implementation, then cover the behavior with a unit test in the same + task — no preceding RED sub-task, but every behavior still carries a test. +One test per observable behavior, not per branch. Sub-tasks: 1-3 hours of focused work. --> ## Phase 1: Foundation diff --git a/internal/tui/wizard.go b/internal/tui/wizard.go index d0113e4..0805a67 100644 --- a/internal/tui/wizard.go +++ b/internal/tui/wizard.go @@ -345,12 +345,13 @@ func (w wizardModel) submit() tea.Cmd { if err := workspace.KebabCheck(feature, "feature"); err != nil { return resultMsg{text: err.Error(), isErr: true} } - // Use the CLI Run path to avoid duplicating logic for spec init. - code := cli.Run([]string{"spec", "init", feature}, w.templates) - if code != 0 { - return resultMsg{text: "spec init failed (exit " + fmt.Sprint(code) + ")", isErr: true} + flow := getStr(w.state, "flow") + if err := cli.SpecInit(w.templates, cli.SpecInitOptions{ + Root: w.root, Feature: feature, Flow: flow, + }); err != nil { + return resultMsg{text: err.Error(), isErr: true} } - return resultMsg{text: "spec '" + feature + "' initialized"} + return resultMsg{text: "spec '" + feature + "' initialized (flow: " + flow + ")"} case "generate": artifact := getStr(w.state, "artifact") if err := cli.SpecGenerate(w.templates, cli.SpecGenerateOptions{ @@ -535,6 +536,11 @@ func specFields() []*field { kind: fText, required: true, input: newTextInput("photo-albums"), validate: validateKebab, }, + { + name: "flow", label: "Development flow", kind: fSelect, required: true, + choices: []string{"tdd", "unit", "tdd-e2e"}, + skipIf: func(s map[string]any) bool { return getStr(s, "operation") != "init" }, + }, { name: "artifact", label: "Artifact", kind: fSelect, required: true, choices: workspace.SpecArtifacts, diff --git a/internal/tui/wizard_test.go b/internal/tui/wizard_test.go index 414587b..7f4d5dc 100644 --- a/internal/tui/wizard_test.go +++ b/internal/tui/wizard_test.go @@ -64,7 +64,7 @@ func TestNewWizardFieldsPerKind(t *testing.T) { n int }{ {wizSteering, "Create steering", 5}, - {wizSpec, "Spec wizard", 4}, + {wizSpec, "Spec wizard", 5}, {wizSkill, "Create skill", 3}, {wizAgent, "Create agent", 5}, {wizMCP, "Add MCP server", 7}, @@ -188,6 +188,29 @@ func TestWizardSubmitSkill(t *testing.T) { } } +// TestWizardSubmitSpecInitFlow asserts the wizard's spec-init submit carries the +// selected development flow into the shared SpecInit op (Req 1.4). +func TestWizardSubmitSpecInitFlow(t *testing.T) { + dir := freshTUIWorkspace(t) + w := wizardModel{kind: wizSpec, templates: templater.FS, root: dir, state: map[string]any{ + "operation": "init", + "feature": "wizard-flow", + "flow": "unit", + }} + msg := w.submit()() + rm, ok := msg.(resultMsg) + if !ok || rm.isErr { + t.Fatalf("spec init submit failed: %#v", msg) + } + b, err := os.ReadFile(filepath.Join(dir, "specs", "wizard-flow", "spec.json")) + if err != nil { + t.Fatalf("spec.json not created: %v", err) + } + if !strings.Contains(string(b), `"development_flow": "unit"`) { + t.Errorf("wizard did not persist flow=unit:\n%s", b) + } +} + func TestWizardSubmitAgentDefaultsTools(t *testing.T) { dir := freshTUIWorkspace(t) // Empty tools slice in state must fall back to least-privilege Read+Grep. diff --git a/internal/validator/validator.go b/internal/validator/validator.go index 021f6b9..ba66ef7 100644 --- a/internal/validator/validator.go +++ b/internal/validator/validator.go @@ -454,6 +454,16 @@ func sortedJoin(in []string) string { return strings.Join(cp, ", ") } +// validDevelopmentFlow reports whether f is one of the selectable development +// flows. Mirrors cli.developmentFlows (validator must not import cli). +func validDevelopmentFlow(f string) bool { + switch f { + case "unit", "tdd", "tdd-e2e": + return true + } + return false +} + // ValidateSteering inspects every (or one) steering file's frontmatter and // reports inclusion-mode errors. Pass empty name to validate every file. func ValidateSteering(steeringDir, name string) ([]Issue, error) { @@ -480,6 +490,9 @@ func ValidateSteering(steeringDir, name string) ([]Issue, error) { continue } fm := frontmatter.Parse(string(data)) + if df := fm.AsString("default_development_flow", ""); df != "" && !validDevelopmentFlow(df) { + issues = append(issues, Issue{File: fname, Msg: "default_development_flow must be one of unit|tdd|tdd-e2e (got '" + df + "')"}) + } inc := fm.AsString("inclusion", "") valid := false for _, m := range []string{"always", "fileMatch", "manual", "auto"} { diff --git a/internal/validator/validator_test.go b/internal/validator/validator_test.go index a32e02b..49622d4 100644 --- a/internal/validator/validator_test.go +++ b/internal/validator/validator_test.go @@ -424,6 +424,32 @@ func TestValidateSteering(t *testing.T) { } } +// Req 2.3 / 3.2: an invalid default_development_flow is flagged; a valid or +// absent one is not. +func TestValidateSteeringFlowDefault(t *testing.T) { + dir := t.TempDir() + files := map[string]string{ + "valid-flow.md": "---\ninclusion: always\ndefault_development_flow: tdd-e2e\n---\n", + "absent-flow.md": "---\ninclusion: always\n---\n", + "invalid-flow.md": "---\ninclusion: always\ndefault_development_flow: bogus\n---\n", + } + for name, body := range files { + if err := os.WriteFile(filepath.Join(dir, name), []byte(body), 0o644); err != nil { + t.Fatal(err) + } + } + issues, err := ValidateSteering(dir, "") + if err != nil { + t.Fatal(err) + } + if len(issues) != 1 { + t.Fatalf("expected 1 issue, got %d: %v", len(issues), issues) + } + if issues[0].File != "invalid-flow.md" || !strings.Contains(issues[0].Msg, "default_development_flow") { + t.Errorf("unexpected issue: %+v", issues[0]) + } +} + func TestValidateSteeringByName(t *testing.T) { dir := t.TempDir() _ = os.WriteFile(filepath.Join(dir, "missing.md"), []byte(""), 0o644) diff --git a/mcp-server/src/tools/spec.ts b/mcp-server/src/tools/spec.ts index 4beef88..dd9e114 100644 --- a/mcp-server/src/tools/spec.ts +++ b/mcp-server/src/tools/spec.ts @@ -8,13 +8,24 @@ export const specTools: ToolDef[] = [ name: "csdd_spec_init", title: "Spec init", description: - "Create specs//spec.json (phase=initial, no approvals, not ready for implementation).", + "Create specs//spec.json (phase=initialized, no approvals, not ready for implementation).", inputSchema: { feature, language: z.string().optional().describe("Spec language (default: en)."), + flow: z + .enum(["unit", "tdd", "tdd-e2e"]) + .optional() + .describe("Development flow: unit (tests after code) | tdd (test-first, default) | tdd-e2e (TDD + e2e). Default: steering default, else tdd."), root: rootField, }, - toArgs: (p) => ["spec", "init", p.feature, ...flag("--language", p.language), ...rootArg(p)], + toArgs: (p) => [ + "spec", + "init", + p.feature, + ...flag("--language", p.language), + ...flag("--flow", p.flow), + ...rootArg(p), + ], }, { name: "csdd_spec_list",