diff --git a/.go-arch-lint.yml b/.go-arch-lint.yml index e5c76f97..7a0c7e13 100644 --- a/.go-arch-lint.yml +++ b/.go-arch-lint.yml @@ -305,6 +305,12 @@ components: infra-mcp: in: infrastructure/mcp + domain-transcript: + in: domain/transcript + + infra-transcript: + in: infrastructure/transcript + # INTERFACES LAYER interfaces-cli: in: interfaces/cli @@ -342,6 +348,7 @@ deps: - domain-errors - domain-plugin - domain-operation + - domain-transcript canUse: - go-stdlib domain-errors: @@ -371,6 +378,7 @@ deps: - domain-errors - domain-plugin - domain-operation + - domain-transcript - application-tools - infra-agents - infra-expression @@ -394,8 +402,10 @@ deps: - domain-ports - domain-errors - domain-plugin + - domain-transcript canUse: - go-stdlib + - uuid # INFRASTRUCTURE — domain + vendors infra-agents: @@ -404,6 +414,7 @@ deps: - domain-ports - domain-errors - domain-plugin + - domain-transcript - infra-logger canUse: - go-stdlib @@ -644,6 +655,19 @@ deps: canUse: - go-stdlib + domain-transcript: + mayDependOn: [] + canUse: + - go-stdlib + + infra-transcript: + mayDependOn: + - domain-transcript + - domain-ports + canUse: + - go-stdlib + - uuid + # INTERFACES — wiring layer (app + infra + domain) interfaces-cli: mayDependOn: @@ -677,6 +701,7 @@ deps: - infra-tokenizer - infra-tools - infra-tools-builtins + - infra-transcript - infra-updater - infra-workflowpkg - infra-xdg diff --git a/.golangci.yml b/.golangci.yml index 588f50fa..bc75c936 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -199,6 +199,18 @@ linters: linters: - gocritic + # gocritic hugeParam - test fake implementations satisfy interfaces by value + - path: _test\.go + text: "hugeParam" + linters: + - gocritic + + # gocritic unnamedResult - test fakes implement interfaces; named returns optional + - path: _test\.go + text: "unnamedResult" + linters: + - gocritic + # gocritic elseif - acceptable in tests for clarity - path: _test\.go text: "elseif" @@ -211,6 +223,12 @@ linters: linters: - gocritic + # staticcheck S1005 - test files use _ = <-ch for clarity when draining channels + - path: _test\.go + text: "S1005" + linters: + - staticcheck + # gocritic rangeValCopy - performance is not critical in CLI display functions - path: internal/interfaces/cli/(run|status)\.go text: "rangeValCopy" @@ -234,6 +252,13 @@ linters: linters: - revive + # Test helper functions use (svc, ctx) parameter order intentionally + # to group the service under test with its arguments; ctx is still present + - path: _test\.go + text: context-as-argument + linters: + - revive + # Internal infrastructure packages use directory names matching stdlib (http, github) # This is intentional and scoped to internal packages only - path: internal/infrastructure/http/ diff --git a/.zpm/kb/feedback/journal.wal b/.zpm/kb/feedback/journal.wal index e69de29b..df190928 100644 --- a/.zpm/kb/feedback/journal.wal +++ b/.zpm/kb/feedback/journal.wal @@ -0,0 +1,9 @@ +{"ts":1780826633,"op":"assert","clause":"feedback:rule(plan_preexisting_violations, review, 'When an implementation plan adds new files to a package that already has a missing_test (or other) integrity_violation, each affected component MUST name the violating file and state RESOLVE-in-feature or DEFER-with-ticket; new files must each ship a test so debt never grows silently. Query integrity_violation/2 in ZPM default before finalizing a plan.', high, plan_review_F106)"} +{"ts":1780826633,"op":"assert","clause":"feedback:trigger(plan_preexisting_violations, '.specify/implementation/', directory)"} +{"ts":1780828489,"op":"assert","clause":"feedback:rule(verify_zpm_violation_before_acting, review, 'ZPM integrity_violation(missing_test,F) is a DERIVED rule from source_file/1 + missing test_file/2 facts — it can be STALE (test exists on disk but no test_file/2 fact). Before treating it as debt in a plan/review, confirm the _test.go sibling is truly absent; if it exists, add the missing test_file/2 fact to default memory instead. Never add a duplicate test file to resolve a phantom violation.', high, plan_review_F106_postmortem)"} +{"ts":1780828489,"op":"assert","clause":"feedback:trigger(verify_zpm_violation_before_acting, '.specify/implementation/', directory)"} +{"ts":1780828573,"op":"retractall","clause":"feedback:rule(plan_preexisting_violations, _, _, _, _)"} +{"ts":1780828573,"op":"retractall","clause":"feedback:trigger(plan_preexisting_violations, _, _)"} +{"ts":1780828725,"op":"retractall","clause":"feedback:rule(plan_preexisting_violations, _, _, _, _, _, _)"} +{"ts":1780828725,"op":"assert","clause":"feedback:rule(plan_preexisting_violations, review, 'STALE-CHECK before acting on a ZPM integrity_violation(missing_test,F): it is a DERIVED rule (source_file + missing test_file/2 + not covered_by), so it goes stale when a _test.go sibling exists on disk but no test_file/2 fact was recorded. Verify the sibling is truly absent; if it exists, add the missing test_file/2 fact to default memory rather than treating it as debt or adding a duplicate test.', high, plan_review_F106_postmortem)"} +{"ts":1780828739,"op":"retractall","clause":"feedback:trigger(verify_zpm_violation_before_acting, '.specify/implementation/', directory)"} diff --git a/.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal b/.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal new file mode 100644 index 00000000..54aaecf2 --- /dev/null +++ b/.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal @@ -0,0 +1,116 @@ +{"ts":1780836417,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:todo(_, _, _, _)"} +{"ts":1780836417,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub(_, _, _)"} +{"ts":1780836417,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock(_, _, _)"} +{"ts":1780836418,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:not_impl(_, _, _)"} +{"ts":1780836418,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file(_, _)"} +{"ts":1780836418,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file('.zpm/kb/feedback/journal.wal', changed)"} +{"ts":1780836418,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file('.zpm/mounts.json', changed)"} +{"ts":1780837829,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:todo(_, _, _, _)"} +{"ts":1780837829,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub(_, _, _)"} +{"ts":1780837829,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock(_, _, _)"} +{"ts":1780837830,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:not_impl(_, _, _)"} +{"ts":1780837830,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file(_, _)"} +{"ts":1780837830,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file('.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', changed)"} +{"ts":1780837831,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_2', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780837831,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_3', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780837832,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_9', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780837832,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_10', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780854565,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:todo(_, _, _, _)"} +{"ts":1780854565,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub(_, _, _)"} +{"ts":1780854566,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock(_, _, _)"} +{"ts":1780854566,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:not_impl(_, _, _)"} +{"ts":1780854566,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file(_, _)"} +{"ts":1780854566,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file('.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', changed)"} +{"ts":1780854567,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_2', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780854567,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_3', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780854567,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_9', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780854567,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_10', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780854568,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_14', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780854568,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_15', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780854568,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_16', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780854568,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_17', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780854569,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_19', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780854569,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_20', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870794,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:todo(_, _, _, _)"} +{"ts":1780870795,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub(_, _, _)"} +{"ts":1780870795,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock(_, _, _)"} +{"ts":1780870795,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:not_impl(_, _, _)"} +{"ts":1780870796,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file(_, _)"} +{"ts":1780870796,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file('.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', changed)"} +{"ts":1780870796,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_2', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870797,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_3', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870797,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_9', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870797,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_10', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870798,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_14', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870798,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_15', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870798,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_16', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870798,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_17', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870799,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_19', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870799,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_20', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870799,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_24', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870800,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_25', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870800,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_26', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870800,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_27', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870801,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_28', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870801,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_29', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870801,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_30', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870802,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_31', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870802,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_32', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870802,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_33', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870803,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_35', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780870803,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_36', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872371,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:todo(_, _, _, _)"} +{"ts":1780872371,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub(_, _, _)"} +{"ts":1780872372,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock(_, _, _)"} +{"ts":1780872372,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:not_impl(_, _, _)"} +{"ts":1780872372,"op":"retractall","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file(_, _)"} +{"ts":1780872373,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file('.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', changed)"} +{"ts":1780872373,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_2', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872373,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_3', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872374,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_9', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872374,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_10', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872374,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_14', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872375,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_15', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872375,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_16', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872375,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_17', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872376,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_19', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872376,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_20', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872376,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_24', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872377,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_25', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872377,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_26', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872377,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_27', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872378,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_28', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872378,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_29', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872378,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_30', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872379,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_31', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872379,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_32', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872379,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_33', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872380,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_35', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872380,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_36', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872380,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_40', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872381,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_41', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872381,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_42', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872381,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_43', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872382,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_44', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872382,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_45', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872382,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_46', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872383,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_47', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872383,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_48', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872383,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_49', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872384,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_50', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872384,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_51', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872384,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_52', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872385,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_53', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872385,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_54', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872385,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_55', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872386,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_56', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872386,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_57', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872386,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_58', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872387,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_59', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872387,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_60', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872387,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_61', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872388,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:stub('issue_1_63', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872388,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_1_64', '.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/journal.wal', 'unknown')"} +{"ts":1780872388,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:pr_file('internal/application/execution_service.go', changed)"} +{"ts":1780872389,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_2_35', 'internal/application/execution_service.go', 'unknown')"} +{"ts":1780872389,"op":"assert","clause":"pr_feature_f106_canonical_agent_exchange_transcript_j:mock('issue_2_134', 'internal/application/execution_service.go', 'unknown')"} diff --git a/.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/knowledge.pl b/.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/knowledge.pl new file mode 100644 index 00000000..ef1df4bb --- /dev/null +++ b/.zpm/kb/pr_feature_f106_canonical_agent_exchange_transcript_j/knowledge.pl @@ -0,0 +1,62 @@ +:- module(pr_feature_f106_canonical_agent_exchange_transcript_j, []). +% ─── PR Tracking Schema ────────────────────────────────────────────────────── +% Memory segment: pr_ +% Lifecycle: created at implement start, gated before commit, archived on merge. +% +% Facts (asserted by scan scripts and LLM): +% pr_file(Path, ChangeType) — file in PR scope (changed | added | test) +% todo(Id, File, Line, Desc) — TODO/FIXME found in changed code +% stub(Id, File, Symbol) — stub/placeholder implementation +% mock(Id, File, Symbol) — mock that should be replaced with real impl +% not_impl(Id, File, Desc) — "not yet implemented" marker +% resolved(Type, Id) — marks a tracked issue as resolved +% +% Dynamic declarations (required by Trealla Prolog for runtime assertion). +:- dynamic(pr_file/2). +:- dynamic(todo/4). +:- dynamic(stub/3). +:- dynamic(mock/3). +:- dynamic(not_impl/3). +:- dynamic(resolved/2). + +% ─── Unresolved queries ───────────────────────────────────────────────────── +% Convenience predicates for querying unresolved issues by type. +unresolved_todo(Id, File, Line, Desc) :- + todo(Id, File, Line, Desc), \+ resolved(todo, Id). +unresolved_stub(Id, File, Symbol) :- + stub(Id, File, Symbol), \+ resolved(stub, Id). +unresolved_mock(Id, File, Symbol) :- + mock(Id, File, Symbol), \+ resolved(mock, Id). +unresolved_not_impl(Id, File, Desc) :- + not_impl(Id, File, Desc), \+ resolved(not_impl, Id). + +% A blocking issue is any tracked issue that has not been resolved. +blocking_issue(Id, todo, File, Desc) :- + todo(Id, File, _, Desc), \+ resolved(todo, Id). +blocking_issue(Id, stub, File, Symbol) :- + stub(Id, File, Symbol), \+ resolved(stub, Id). +blocking_issue(Id, mock, File, Symbol) :- + mock(Id, File, Symbol), \+ resolved(mock, Id). +blocking_issue(Id, not_impl, File, Desc) :- + not_impl(Id, File, Desc), \+ resolved(not_impl, Id). + +% PR is ready ONLY when zero blocking issues remain. +pr_ready :- \+ blocking_issue(_, _, _, _). + +% Health summary — counts by category. +pr_health(blocking, N) :- + findall(I, blocking_issue(I, _, _, _), L), length(L, N). +pr_health(resolved, N) :- + findall(I, resolved(_, I), L), length(L, N). +pr_health(files, N) :- + findall(F, pr_file(F, _), L), length(L, N). + +% Coverage gap: source file changed without corresponding test file. +coverage_gap(File) :- + pr_file(File, changed), + \+ pr_file(File, test), + \+ test_file(File, _). + +% List all blocking issues as Id-Type-File-Desc tuples. +all_blockers(Blockers) :- + findall(blocker(Id, Type, File, Desc), blocking_issue(Id, Type, File, Desc), Blockers). diff --git a/README.md b/README.md index 99e4ac22..a359e007 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ A Go CLI tool for orchestrating AI agents (Claude, Gemini, Codex, GitHub Copilot - **Structured Error Codes** - Hierarchical error taxonomy (`USER.INPUT.MISSING_FILE`) with `awf error` lookup command - **Actionable Error Hints** - Context-aware suggestions ("Did you mean?") with fuzzy matching, suppressible via `--no-hints` - **Audit Trail** - Structured JSONL audit log with paired start/end entries per execution, secret masking, configurable path, and atomic writes +- **Canonical Agent Exchange Transcript** - Append-only JSONL transcript per run (`storage/transcripts/.jsonl`) capturing the full workflow lifecycle and agent exchange in one normalized stream; closed `EventType` vocabulary (run/step lifecycle, user/assistant messages, tool call/result) and typed `ContentBlock` (text, thinking, tool_use, tool_result, command, stream) with `fidelity` marker (`router` vs `agent_emitted`); per-provider normalization for Claude, Codex (with NUL byte tolerance), Gemini, Copilot, and OpenAI HTTP; tool capture at the `tools.Router.CallTool` seam covers builtin and plugin tools; sub-workflow linkage via `child_run_id`/`parent_run_id` with one file per run; bounded live fan-out with drop-newest policy keeps slow subscribers from stalling disk writes; atomic `O_APPEND` writes with `0o600` permissions and monotonic `Seq` ordering; coexists with `audit.jsonl` and `DisplayEvent` channels unchanged. See [Agent Exchange Transcript](docs/user-guide/transcript.md). - **Distributed Tracing** - OpenTelemetry integration for visibility into workflow execution with spans for steps, agents, parallel/loop blocks, and shell commands; export to any OTLP-compatible backend (Jaeger, Grafana Tempo, Honeycomb) via `--otel-exporter` and `--otel-service-name` flags - **Plugin System** - Extend AWF with custom operations, validators, and step types via gRPC plugins (HashiCorp go-plugin); automatic mutual TLS (AutoMTLS) encryption for all host-plugin communication with zero configuration; SHA-256 binary integrity verification at launch time blocks tampered or corrupted plugins; plugin subprocess log and stdout/stderr forwarding to AWF's structured logger for crash diagnostics; validators run custom rules during `awf validate`, custom step types register new `type:` values for workflow steps; event system enables plugins to subscribe to core lifecycle events (`workflow.*`, `step.*`) and emit custom inter-plugin events with glob pattern matching, per-plugin buffered channels, and cycle detection; GRPCBroker enables plugins to emit events back to the host at runtime via a dedicated reverse channel with manifest-based permission enforcement; persistent gRPC streaming delivers events to plugins over long-lived connections with automatic fallback to unary RPCs for plugins that don't support streaming; includes `sdk.Serve()` entry point for plugin authors, and install/update/remove from GitHub Releases with checksum verification - **Workflow Packs** - Share reusable workflows and prompts via `awf workflow install owner/repo[@version]` from GitHub Releases with manifest validation, checksum verification, and atomic installation; execute with `awf run pack/workflow` namespace syntax; `{{.awf.prompts_dir}}` and `{{.awf.scripts_dir}}` resolve context-aware with 3-tier resolution (user override → pack embedded → global); `call_workflow` within packs resolves relative to pack root; `--global` flag for user-level installation; `awf workflow remove ` for cleanup; source metadata tracking and plugin dependency warnings diff --git a/docs/README.md b/docs/README.md index 5dadacac..d4df7694 100644 --- a/docs/README.md +++ b/docs/README.md @@ -67,7 +67,8 @@ Technical reference documentation: - [Variable Interpolation](reference/interpolation.md) - Template variables and syntax - [Input Validation](reference/validation.md) - Validation rules for workflow inputs - [Loop Reference](reference/loop.md) - Loop control flow and transitions -- [Audit Trail Schema](reference/audit-trail-schema.md) - JSONL entry format, fields, and constraints +- [Audit Trail Schema](reference/audit-trail-schema.md) - Workflow start/completion audit log JSONL format +- [Transcript Schema](reference/transcript-schema.md) - Canonical agent exchange transcript (full execution lifecycle in JSONL) - [Package Documentation](reference/package-documentation.md) - Discovering code documentation with `go doc` ## Development diff --git a/docs/reference/transcript-schema.md b/docs/reference/transcript-schema.md new file mode 100644 index 00000000..2b3c98f6 --- /dev/null +++ b/docs/reference/transcript-schema.md @@ -0,0 +1,354 @@ +--- +title: "Transcript Schema" +--- + +Reference for the agent exchange transcript JSONL format produced at `storage/transcripts/.jsonl`. Each line is a complete `ExchangeEvent`. See the [Agent Exchange Transcript guide](../user-guide/transcript.md) for usage. + +## ExchangeEvent Envelope + +All lines share the same envelope. Event-specific data lives in `payload`. + +```json +{ + "seq": 1, + "run_id": "550e8400-e29b-41d4-a716-446655440000", + "type": "run.started", + "path": "", + "iteration": 0, + "timestamp": "2026-06-08T08:14:42.123Z", + "payload": null +} +``` + +### Envelope Fields + +| Field | Type | Required | Notes | +|---|---|---|---| +| `seq` | uint64 | yes | Strictly monotonic per file, starting at `1`. Allocated atomically by the recorder before write. | +| `run_id` | string | yes | UUID v4 identifying the workflow run. Reused from `ExecutionContext.WorkflowID` — same identifier as the audit trail. | +| `parent_run_id` | string | no | UUID v4 of the enclosing parent run. Present on child lifecycle events when the run was invoked via `call_workflow`. Omitted via `omitempty` when empty. | +| `child_run_id` | string | no | UUID v4 of a spawned sub-workflow run. Present on `step.call_workflow.started` / `step.call_workflow.completed` envelopes in the parent file. Omitted via `omitempty` when empty. | +| `type` | string | yes | One of the closed [event types](#event-types). | +| `path` | string | yes | Dot-separated step path within the workflow definition (e.g., `analyze`, `parallel_block.branch_a`). Empty string at the run-level events. | +| `iteration` | int | yes | Zero-based loop/retry iteration counter. `0` outside loops. | +| `timestamp` | string | yes | RFC 3339 / ISO 8601 with timezone, UTC preferred. Set at emission time. | +| `payload` | any | yes | Concrete shape determined by `type`. May be `null` for run lifecycle events. See [Payload Dispatch](#payload-dispatch). | + +--- + +## Event Types + +The vocabulary is **closed**. The complete set of valid `type` values is exactly: + +| `type` | Payload | Description | +|---|---|---| +| `run.started` | `*StepPayload` or `null` | Workflow run begins. | +| `run.completed` | `*StepPayload` or `null` | Workflow run ends. | +| `step.started` | `*StepPayload` | A step begins. Covers `agent`, `command`, `operation`, `terminal`, `parallel`, `for_each`, `while`, and generic custom step types. | +| `step.completed` | `*StepPayload` | A step ends (success or failure). | +| `step.call_workflow.started` | `*StepPayload` | Parent emits a sub-workflow invocation; the envelope carries `child_run_id`. | +| `step.call_workflow.completed` | `*StepPayload` | Parent observes sub-workflow completion. | +| `message.user` | `*MessagePayload` | Agent seam — resolved user prompt + composed `system_prompt`. | +| `message.assistant` | `*MessagePayload` | Agent reply, normalized into `ContentBlock`s. | +| `tool.call` | `*ToolPayload` | Tool invocation begins. Captured at the `tools.Router.CallTool` seam (`fidelity:"router"`) or from agent NDJSON (`fidelity:"agent_emitted"`). | +| `tool.result` | `*ToolPayload` | Tool invocation completes. | + +Writers never emit values outside this set. Readers must handle unknown `type` values forward-compatibly — see [Forward Compatibility](#forward-compatibility). + +--- + +## Payload Shapes + +### `StepPayload` + +Carried by `step.*` and `run.*` events. + +| Field | Type | Required | Notes | +|---|---|---|---| +| `name` | string | yes | Step name from the workflow YAML, or workflow name for `run.*` events. | +| `kind` | string | yes | Step type discriminator (`agent`, `command`, `operation`, `terminal`, `parallel`, `for_each`, `while`, `call_workflow`, or a custom type name). Reflects the actual `step.Type` field. | +| `error` | string | no | Failure description on `*.completed` events; omitted on success via `omitempty`. | +| `result` | any | no | Step result for `*.completed` events. Shape depends on the step kind (e.g., agent output, command stdout, custom step return value). Omitted via `omitempty` when nil. | + +```json +{ + "type": "step.started", + "payload": { + "name": "analyze", + "kind": "agent" + } +} +``` + +### `MessagePayload` + +Carried by `message.user` and `message.assistant`. + +| Field | Type | Required | Notes | +|---|---|---|---| +| `role` | string | yes | `"user"` or `"assistant"`. Discriminates payload dispatch in tolerant decoders. | +| `blocks` | array of `ContentBlock` | yes | Ordered content blocks. For `message.user`, contains the resolved prompt and (when non-empty) the composed `system_prompt`. For `message.assistant`, contains the normalized provider output. | + +```json +{ + "type": "message.user", + "payload": { + "role": "user", + "blocks": [ + {"type": "text", "fidelity": "router", "text": "Review main.go for bugs."} + ] + } +} +``` + +### `ToolPayload` + +Carried by `tool.call` and `tool.result`. + +| Field | Type | Required | Notes | +|---|---|---|---| +| `name` | string | yes | Tool name (`Read`, `Bash`, plugin operation name, …). | +| `call_id` | string | yes | Opaque correlation token linking a `tool.call` to its `tool.result`. | +| `input` | any | yes (on `tool.call`) | Tool arguments as provided. | +| `output` | any | yes (on `tool.result`) | Tool return value. | +| `error` | string | no | Failure description on `tool.result`; omitted on success via `omitempty`. | +| `fidelity` | string | yes | `"router"` (synthesized at the AWF router seam) or `"agent_emitted"` (reported by the agent over stdio NDJSON). | + +```json +{ + "type": "tool.call", + "payload": { + "name": "Read", + "call_id": "tool_01H...", + "input": {"path": "main.go"}, + "output": null, + "fidelity": "router" + } +} +``` + +--- + +## ContentBlock + +Building block inside `MessagePayload.blocks`. The vocabulary is **closed**; field population depends on `type`. Unused fields are omitted via `omitempty`. + +| `type` | Carries | Active Fields | +|---|---|---| +| `text` | Plain assistant text | `text` | +| `thinking` | Reasoning output (e.g., Claude extended thinking) | `thinking` | +| `tool_use` | Tool invocation block | `tool_name`, `tool_id`, `tool_input` | +| `tool_result` | Tool output block | `tool_id`, `tool_content` | +| `command` | Shell command executed by a step | `command` | +| `stream` | Provider stream chunk for partial-output replay | `chunk` (in `text` field per the wire format) | + +Every block carries: + +| Field | Type | Required | Notes | +|---|---|---|---| +| `type` | string | yes | One of the six block types above. | +| `fidelity` | string | yes | `"router"` or `"agent_emitted"`. | + +Block-specific fields (only populated for the relevant `type`; others omitted via `omitempty`): + +| Field | Type | Used By | Notes | +|---|---|---|---| +| `text` | string | `text`, `stream` | Verbatim assistant text. NUL bytes preserved. | +| `thinking` | string | `thinking` | Verbatim thinking output. | +| `tool_name` | string | `tool_use` | Tool name as advertised by the provider. | +| `tool_id` | string | `tool_use`, `tool_result` | Provider-supplied correlation token; links a `tool_use` block to its `tool_result` block within the same message. | +| `tool_input` | any | `tool_use` | Arguments as provided by the agent. | +| `tool_content` | any | `tool_result` | Output as observed. May be a string, structured object, or `null` for errors. | +| `command` | string | `command` | Resolved shell command string after template interpolation. | +| `chunk` | string | `stream` | Single stream chunk from the provider. | + +```json +{"type": "text", "fidelity": "agent_emitted", "text": "Found 2 issues."} +{"type": "thinking", "fidelity": "agent_emitted", "thinking": "First I should..."} +{"type": "tool_use", "fidelity": "agent_emitted", "tool_name": "Read", "tool_id": "toolu_01", "tool_input": {"path": "main.go"}} +{"type": "tool_result", "fidelity": "router", "tool_id": "toolu_01", "tool_content": "package main\n..."} +{"type": "command", "fidelity": "router", "command": "go test ./..."} +``` + +--- + +## Fidelity + +The `fidelity` field on every `ContentBlock` and `ToolPayload` distinguishes two provenances: + +| Value | Meaning | +|---|---| +| `"router"` | Block synthesized by AWF's `tools.Router.CallTool` seam. Authoritative for in-process tool calls (builtin + plugin). | +| `"agent_emitted"` | Block emitted directly by the agent (e.g., stdio proxy NDJSON `tool_use`). Provenance is the agent, not the router. | + +Consumers can use this marker to avoid double-counting tool calls reported on multiple channels. + +--- + +## Payload Dispatch + +The decoder selects the concrete `payload` type from the envelope `type` combined with a tolerant probe of the JSON shape: + +1. If the raw payload is a JSON **array**, decode into `[]ContentBlock`. +2. If the raw payload is an **object** containing `"role"`, decode into `*MessagePayload`. +3. Else if it contains `"call_id"`, decode into `*ToolPayload`. +4. Else if it contains `"kind"`, decode into `*StepPayload`. +5. Otherwise, decode into a generic `any` (forward-compatibility fallback). + +A `null` or missing payload yields `payload: nil` — valid for `run.*` lifecycle events. + +--- + +## File Properties + +| Property | Value | +|---|---| +| Path | `storage/transcripts/.jsonl` | +| Mode | `0o600` | +| Open flags | `O_APPEND \| O_CREATE \| O_WRONLY` | +| Writes | Serialized via `sync.Mutex` (always held — also covers payloads beyond POSIX `PIPE_BUF`) | +| Encoding | UTF-8, one JSON object per line, LF line endings | +| Ordering | Strictly monotonic `seq` per file, starting at `1` | + +### Concurrency + +- `seq` is allocated from a single `atomic.AddUint64` inside the recorder *before* the write lock is taken. +- All writes are serialized; no torn lines even when a payload exceeds `PIPE_BUF`. +- Multiple goroutines emitting concurrently produce a strictly monotonic `seq` series with no gaps. + +### Atomicity + +- `O_APPEND` guarantees that each successful `write()` lands at the file end. +- Process kill mid-write cannot tear a line written under `PIPE_BUF`; the mutex covers the larger case. + +--- + +## Sub-Workflow Linkage + +A `call_workflow` step produces a **new file** for the child run. Linkage is bidirectional: + +- **Parent file** — `step.call_workflow.started` envelope sets `child_run_id` to the new run's UUID. +- **Child file** — every envelope sets `parent_run_id` to the invoking run's UUID. + +Each file's `seq` series is independent and starts at `1`. Reconstruction walks `child_run_id` links from the parent to locate every child file and assembles a connected tree. + +Nesting deeper than one level produces one file per level with consistent parent linkage at every depth. + +--- + +## Forward Compatibility + +The reader is **tolerant**; the writer is **strict**: + +- Writers only emit values from the closed `EventType` and `BlockType` vocabularies. +- Readers that encounter an unknown `EventType` surface it through `errors.Is(err, transcript.ErrUnknownEventType)` so callers may treat it as a warning rather than a parse failure. Likewise `transcript.ErrUnknownBlockType` for unknown `BlockType` values. +- Adding new envelope fields is **safe** — older readers ignore unknown fields per the JSON standard. +- Adding new event or block types requires every consumer to handle the unknown-type path explicitly. Removing or renaming values requires a coordinated schema bump. + +--- + +## Coexistence + +The transcript is a **pure addition**: + +- `audit.jsonl` output is byte-identical whether the transcript recorder is wired or not. +- `DisplayEvent` streams (used by streaming display and TUI) remain unchanged. +- Plugin behavior is unaffected — instrumentation happens at AWF boundaries, not inside plugins. + +--- + +## Live Fan-Out + +Subscribers connect via the `Recorder.Subscribe()` port and receive every event written to disk, in order. + +| Property | Value | +|---|---| +| Per-subscriber buffer | 256 events (default) | +| Drop policy | drop-newest | +| Ordering | Write-then-broadcast: disk first, subscribers second | +| Back-pressure | Slow subscriber drops events; disk write never blocks | +| Drop visibility | `FanOut.Stats()` exposes a drop counter; rate-limited WARN log (1/s per subscriber) when drops occur | +| `Close()` | Idempotent on both recorder and subscriber | + +--- + +## Limitations + +- **No default secret masking.** Prompts, system prompts, and tool inputs are recorded verbatim. The design preserves an opt-in masking hook for future use, but no masking ships in this version. +- **`awf mcp-serve` subprocess capture is not yet instrumented.** The `fidelity:"agent_emitted"` marker documents this gap and enables a future transition without changing the wire format. +- **Token-by-token streaming deltas are not captured.** The `stream` block type exists in the vocabulary but is reserved for future use. + +--- + +## Querying Examples + +### Decode every event + +```bash +jq '.' storage/transcripts/.jsonl +``` + +### Pair tool calls with their results + +```bash +jq -s ' + [.[] | select(.type == "tool.call" or .type == "tool.result")] + | group_by(.payload.call_id) + | map({ + call_id: .[0].payload.call_id, + name: .[0].payload.name, + input: (map(select(.type == "tool.call"))[0].payload.input), + output: (map(select(.type == "tool.result"))[0].payload.output) + }) +' storage/transcripts/.jsonl +``` + +### Extract every assistant text response + +```bash +jq -r 'select(.type == "message.assistant") + | .payload.blocks[] + | select(.type == "text") + | .text' storage/transcripts/.jsonl +``` + +### Verify monotonic seq + +```bash +jq -s '[.[].seq] | . == (sort)' storage/transcripts/.jsonl +# true +``` + +### Reconstruct a step tree + +```bash +jq -r 'select(.type == "step.started") + | "\(.seq)\t\(.path)\t\(.payload.kind)\t\(.payload.name)"' \ + storage/transcripts/.jsonl +``` + +### Walk parent → child runs + +```bash +# List every child run spawned by this parent +jq -r 'select(.type == "step.call_workflow.started") | .child_run_id' \ + storage/transcripts/.jsonl +``` + +### Distinguish router-fidelity vs agent-emitted tool calls + +```bash +jq -s ' + [.[] | select(.type == "tool.call")] + | group_by(.payload.fidelity) + | map({fidelity: .[0].payload.fidelity, count: length}) +' storage/transcripts/.jsonl +``` + +--- + +## See Also + +- [Agent Exchange Transcript Guide](../user-guide/transcript.md) — Conceptual overview, file location, fan-out, and security notes +- [Audit Trail Schema](audit-trail-schema.md) — The separate paired-event audit format that coexists with the transcript +- Package documentation: run `go doc github.com/awf-project/cli/internal/domain/transcript` for the in-tree reference diff --git a/docs/user-guide/audit-trail.md b/docs/user-guide/audit-trail.md index 09c783ea..a68f05be 100644 --- a/docs/user-guide/audit-trail.md +++ b/docs/user-guide/audit-trail.md @@ -94,8 +94,33 @@ Audit trail failures never block workflow execution: - If the audit file path is not writable, a warning is emitted to stderr and the workflow proceeds normally. - Audit write errors do not change the workflow exit code. +## Canonical Transcript vs. Audit Trail + +In addition to the audit trail, AWF automatically creates a **canonical transcript** file for every workflow run. While similar in format (both JSONL), they serve different purposes: + +| Aspect | Audit Trail | Canonical Transcript | +|--------|-----------|----------------------| +| **Scope** | Workflow-level summary | Full execution details | +| **Events per run** | 2 (start + completion) | Hundreds (every step, message, tool call) | +| **File location** | `$XDG_DATA_HOME/awf/audit.jsonl` | `storage/transcripts/.jsonl` | +| **Purpose** | Compliance, accounting | Replay, debugging, detailed audit | +| **Step details** | Minimal | Complete (prompts, tool inputs/outputs, loop iterations) | +| **Agent exchange** | Not recorded | Full lifecycle (every message) | + +**Use the audit trail** for: +- Compliance logging (who ran what, when) +- Execution accounting (success/failure counts) +- Historical queries across all runs + +**Use the canonical transcript** for: +- Debugging failed workflows (see exact agent response) +- Replaying executions offline +- Analyzing agent behavior +- Sub-workflow tree reconstruction + ## See Also +- [Transcript Schema](../reference/transcript-schema.md) — Full transcript field reference and content blocks - [Audit Trail Schema](../reference/audit-trail-schema.md) — Full field reference and constraints - [ADR-0010](../ADR/010-paired-jsonl-audit-trail-with-atomic-append.md) — Design decision: paired JSONL with atomic append - [ADR-0011](../ADR/011-application-layer-secret-masking-for-audit-events.md) — Design decision: application-layer secret masking diff --git a/docs/user-guide/transcript.md b/docs/user-guide/transcript.md new file mode 100644 index 00000000..d2457e50 --- /dev/null +++ b/docs/user-guide/transcript.md @@ -0,0 +1,149 @@ +--- +title: "Agent Exchange Transcript" +--- + +AWF records a canonical agent exchange transcript for every workflow run. It is an append-only JSONL stream that captures the full workflow lifecycle — step start/end events, user prompts, assistant replies, tool calls, tool results, and sub-workflow linkage — in a single normalized format suitable for offline replay, auditing, and reconstruction of the execution tree. + +The transcript coexists with the [audit trail](audit-trail.md) and the streaming `DisplayEvent` output; none of those channels change because the transcript exists. + +## How It Works + +When a workflow runs, AWF appends one JSON object per line to: + +``` +storage/transcripts/.jsonl +``` + +Each line is a complete [`ExchangeEvent`](../reference/transcript-schema.md#exchangeevent) with a strictly monotonic `seq`, the workflow `run_id`, the dot-separated `path` of the current step, a UTC `timestamp`, and a typed `payload` whose shape is determined by the event `type`. + +Events are written **to disk first**, then broadcast to live subscribers. The disk file is the durable contract: a slow consumer never blocks a write. + +## Event Vocabulary + +The `type` field uses a closed vocabulary. Every line in the file is one of: + +| Event Type | Emitted When | +|---|---| +| `run.started` | Workflow run begins | +| `run.completed` | Workflow run ends (success or failure) | +| `step.started` | Any step begins (`agent`, `command`, `operation`, `terminal`, `parallel`, `for_each`, `while`, custom) | +| `step.completed` | Any step ends | +| `step.call_workflow.started` | Parent emits a sub-workflow invocation; carries `child_run_id` | +| `step.call_workflow.completed` | Parent observes sub-workflow completion | +| `message.user` | Agent seam — resolved user prompt + composed `system_prompt` | +| `message.assistant` | Agent reply, normalized into `ContentBlock`s | +| `tool.call` | A tool invocation begins (router seam or agent-emitted) | +| `tool.result` | A tool invocation completes | + +Readers must treat unknown event types as forward-compatible: surface them with the type intact, do not panic. See [Schema → Forward Compatibility](../reference/transcript-schema.md#forward-compatibility). + +## Content Blocks + +Assistant messages and tool payloads carry typed `ContentBlock` entries. The closed vocabulary is: + +| Block Type | Carries | +|---|---| +| `text` | Plain assistant text | +| `thinking` | Reasoning / thinking output (Claude extended thinking, etc.) | +| `tool_use` | Tool invocation block with `tool_name`, `tool_id`, `tool_input` | +| `tool_result` | Tool output block with `tool_content` | +| `command` | Shell command executed by a step | +| `stream` | Provider stream chunk (for replays that preserve partials) | + +Every block also carries a `fidelity` marker: + +- `"router"` — block synthesized by AWF's `tools.Router.CallTool` seam. Authoritative for in-process tool calls. +- `"agent_emitted"` — block emitted directly by the agent (e.g., stdio proxy NDJSON `tool_use`). Provenance is the agent, not the router. + +This marker lets consumers distinguish ground truth from agent-reported events without double-counting. + +## Per-Provider Normalization + +Outputs from Claude, Codex, Gemini, Copilot, and OpenAI HTTP are normalized into the same `ContentBlock` stream by a single mapping layer in `internal/infrastructure/agents/`. Provider quirks are absorbed there: + +- **Codex** — embedded NUL bytes in JSONL are handled without corrupting the transcript line. +- **Dangling `tool_use`** — a `tool_use` without a matching `tool_result` (timeout, crash) is recorded as-is; the parser does not panic or drop the message. +- **Mixed blocks** — Claude `thinking` + `text` + `tool_use` in one response yields three blocks in order. + +The existing per-provider `DisplayEvent` output is **not** changed; the transcript adds a sibling mapping rather than replacing the display layer. + +## Sub-Workflow Linkage + +When a parent run invokes `call_workflow`, AWF writes the child run to its own file. Linkage is bidirectional: + +- The **parent** emits `step.call_workflow.started` carrying `child_run_id`. +- The **child** writes every lifecycle event with `parent_run_id` populated. + +Each file is self-contained — one `seq` series per file, reconstructable in isolation. Reading both files yields a single connected execution tree. + +Nesting deeper than one level produces one file per level with consistent parent linkage at every depth. + +## Live Fan-Out + +Consumers subscribe to the recorder to receive events as they are written. Fan-out is bounded: + +- Per-subscriber channel buffer: **256 events** (default). +- Back-pressure policy: **drop-newest** — once a subscriber's buffer fills, new events are dropped for that subscriber until it drains. +- A drop counter is exposed via `FanOut.Stats()`; the recorder logs a rate-limited WARN (1/s) per subscriber when drops occur. +- The disk write is **never** blocked by a slow subscriber; the file remains complete and monotonic regardless. + +`Close()` on the recorder or a subscriber is idempotent — calling it twice is safe. + +## File Properties + +| Property | Value | +|---|---| +| Path | `storage/transcripts/.jsonl` | +| Mode | `0o600` (owner read/write only) | +| Write mode | `O_APPEND` | +| Atomicity | Single `write()` per line; mutex-serialized beyond `PIPE_BUF` | +| Encoding | UTF-8, one JSON object per line, LF line endings | +| Ordering | Strictly monotonic `seq` starting at `1` | + +`` is the same UUID v4 used by the audit trail and the state machine — there is no separate ID infrastructure. + +## Querying the Transcript + +The format is JSONL, so standard tooling works: + +```bash +# Pretty-print an entire run +jq '.' storage/transcripts/.jsonl + +# Extract every tool call with its result +jq 'select(.type == "tool.call" or .type == "tool.result")' storage/transcripts/.jsonl + +# List all assistant text content blocks +jq -r 'select(.type == "message.assistant") | .payload.blocks[] | select(.type == "text") | .text' \ + storage/transcripts/.jsonl + +# Show only router-fidelity tool events +jq 'select(.type == "tool.call" and .payload.fidelity == "router")' \ + storage/transcripts/.jsonl + +# Reconstruct the step path tree +jq -r 'select(.type == "step.started") | "\(.seq)\t\(.path)\t\(.payload.kind)"' \ + storage/transcripts/.jsonl + +# Follow nested sub-workflows: find every child run referenced by a parent +jq -r 'select(.type == "step.call_workflow.started") | .child_run_id' \ + storage/transcripts/.jsonl +``` + +## Coexistence Guarantees + +- `audit.jsonl` output is **byte-identical** whether the transcript recorder is wired or not. +- `DisplayEvent` streams remain unchanged; the transcript adds a parallel mapping, not a replacement. +- Plugins are unaffected — capture happens at AWF boundaries (router seam, agent seam), never inside plugin internals. + +## Security & Privacy + +- Files are written with mode `0o600` — only the owning user can read them. +- **No secrets are masked by default.** A `message.user` event includes the resolved prompt and composed system prompt verbatim; `tool.call` payloads include arguments verbatim. Treat transcript files like raw command logs. +- The design preserves an opt-in masking hook for future use, but no masking ships in this version. + +## See Also + +- [Transcript Schema](../reference/transcript-schema.md) — Full field reference, payload shapes, and constraints +- [Audit Trail](audit-trail.md) — The separate paired-event audit channel that coexists with the transcript +- [Distributed Tracing](tracing.md) — OpenTelemetry spans for cross-system correlation diff --git a/internal/application/conversation_manager.go b/internal/application/conversation_manager.go index b8cb54ea..829a69e1 100644 --- a/internal/application/conversation_manager.go +++ b/internal/application/conversation_manager.go @@ -222,7 +222,10 @@ func (m *ConversationManager) ExecuteConversation( // F099: Start MCP tool proxy for the conversation if configured. The proxy lives for // the full lifetime of the multi-turn loop; cleanup runs after the loop exits. - proxyCleanup, proxyErr := startConversationToolProxy(ctx, m.toolProxy, m.logger, step, options, resolvedProvider, provider) + // F106: the ConversationManager does not own a transcript recorder, so router-fidelity + // tool capture is not wired on this multi-turn path (single-turn agent steps cover the + // primary US3 case); pass a nil recorder so capture is a graceful no-op here. + proxyCleanup, proxyErr := startConversationToolProxy(ctx, m.toolProxy, m.logger, step, options, resolvedProvider, provider, nil, "") if proxyErr != nil { return nil, fmt.Errorf("step %s: %w", step.Name, proxyErr) } diff --git a/internal/application/execution_service.go b/internal/application/execution_service.go index de0b9f83..15487529 100644 --- a/internal/application/execution_service.go +++ b/internal/application/execution_service.go @@ -15,6 +15,7 @@ import ( domainerrors "github.com/awf-project/cli/internal/domain/errors" "github.com/awf-project/cli/internal/domain/pluginmodel" "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" "github.com/awf-project/cli/internal/domain/workflow" "github.com/awf-project/cli/pkg/display" "github.com/awf-project/cli/pkg/interpolation" @@ -74,6 +75,10 @@ type ExecutionService struct { skillRepo ports.SkillRepository agentRoleRepo ports.AgentRoleRepository toolProxy *tools.ProxyService + recorder ports.Recorder + recorderFactory ports.RecorderFactory + transcriptDir string + agentOutputNormalizer ports.AgentOutputNormalizer } // SetOutputWriters configures streaming output writers. @@ -156,6 +161,40 @@ func (s *ExecutionService) SetAuditTrailWriter(w ports.AuditTrailWriter) { s.auditTrailWriter = w } +// SetRecorder configures the transcript recorder for F106 canonical exchange transcript. +// When nil, transcript emission is skipped without error. +func (s *ExecutionService) SetRecorder(r ports.Recorder) { + s.recorder = r +} + +// SetRecorderFactory configures the factory used to create child recorders for +// call_workflow sub-runs. When nil, child transcript recording is skipped. +func (s *ExecutionService) SetRecorderFactory(f ports.RecorderFactory) { + s.recorderFactory = f +} + +// SetTranscriptDir configures the directory where transcript files are written. +// Derived from the same config as the parent recorder's path. +func (s *ExecutionService) SetTranscriptDir(dir string) { + s.transcriptDir = dir +} + +// SetAgentOutputNormalizer configures the normalizer that converts provider raw output +// into transcript ContentBlocks for F106 message.assistant emission. When nil, agent +// output is not normalized into the transcript (graceful no-op). +func (s *ExecutionService) SetAgentOutputNormalizer(n ports.AgentOutputNormalizer) { + s.agentOutputNormalizer = n +} + +// transcriptBaseDir returns the configured transcript directory, falling back +// to "storage/transcripts" when none is set. +func (s *ExecutionService) transcriptBaseDir() string { + if s.transcriptDir != "" { + return s.transcriptDir + } + return "storage/transcripts" +} + // SetPluginService configures the plugin service for disabled-plugin detection. // When set, executePluginOperation checks if the plugin is disabled before lookup. // When nil, the check is skipped (backward compatible). @@ -359,7 +398,7 @@ func (s *ExecutionService) Run( workflowName string, inputs map[string]any, ) (*workflow.ExecutionContext, error) { - return s.runWithCallStack(ctx, workflowName, inputs, nil) + return s.runWithCallStack(ctx, workflowName, inputs, nil, "", "") } // RunWithWorkflow executes a pre-loaded workflow with the given inputs. @@ -369,7 +408,20 @@ func (s *ExecutionService) RunWithWorkflow( wf *workflow.Workflow, inputs map[string]any, ) (*workflow.ExecutionContext, error) { - return s.runWithCallStackAndWorkflow(ctx, "", wf, inputs, nil) + return s.runWithCallStackAndWorkflow(ctx, "", wf, inputs, nil, "", "") +} + +// RunWithWorkflowAndRunID executes a pre-loaded workflow using runID as the execution's +// WorkflowID. This lets callers (e.g. the CLI run command) reuse the same identifier for +// the transcript filename (.jsonl) and the run_id stamped on every emitted event, +// keeping the on-disk file and its contents correlatable (F106 SC-001). +func (s *ExecutionService) RunWithWorkflowAndRunID( + ctx context.Context, + wf *workflow.Workflow, + inputs map[string]any, + runID string, +) (*workflow.ExecutionContext, error) { + return s.runWithCallStackAndWorkflow(ctx, "", wf, inputs, nil, runID, "") } // RunWorkflowAsync starts workflow execution and returns the ExecutionContext immediately. @@ -380,7 +432,7 @@ func (s *ExecutionService) RunWorkflowAsync( wf *workflow.Workflow, inputs map[string]any, ) (*workflow.ExecutionContext, <-chan error, error) { - spanCtx, span, execCtx, err := s.prepareExecution(ctx, wf, inputs, nil) + spanCtx, span, execCtx, err := s.prepareExecution(ctx, wf, inputs, nil, "", "") if err != nil { if span != nil { span.End() @@ -400,11 +452,17 @@ func (s *ExecutionService) RunWorkflowAsync( // prepareExecution handles workflow setup: span creation, template expansion, context // initialization, input validation, audit emission, and workflow_start hooks. // Returns the span-enriched context, the span (caller must End() it), and the execution context. +// +// runID, when non-empty, is used verbatim as the execution's WorkflowID so the +// transcript filename (.jsonl) and the run_id stamped on every emitted event +// are the same identifier. An empty runID falls back to a generated UUID. parentRunID +// is propagated onto the child context for F106 sub-workflow transcript linkage. func (s *ExecutionService) prepareExecution( ctx context.Context, wf *workflow.Workflow, inputs map[string]any, parentCallStack []string, + runID, parentRunID string, ) (context.Context, ports.Span, *workflow.ExecutionContext, error) { ctx, span := s.startSpan(ctx, "workflow.run") span.SetAttribute("workflow.name", wf.Name) @@ -418,7 +476,12 @@ func (s *ExecutionService) prepareExecution( } } - execCtx := workflow.NewExecutionContext(uuid.New().String(), wf.Name) + resolvedRunID := runID + if resolvedRunID == "" { + resolvedRunID = uuid.New().String() + } + execCtx := workflow.NewExecutionContext(resolvedRunID, wf.Name) + execCtx.ParentRunID = parentRunID execCtx.Status = workflow.StatusRunning span.SetAttribute("execution_id", execCtx.WorkflowID) @@ -612,18 +675,21 @@ func (s *ExecutionService) runWithCallStack( workflowName string, inputs map[string]any, parentCallStack []string, + runID, parentRunID string, ) (*workflow.ExecutionContext, error) { - return s.runWithCallStackAndWorkflow(ctx, workflowName, nil, inputs, parentCallStack) + return s.runWithCallStackAndWorkflow(ctx, workflowName, nil, inputs, parentCallStack, runID, parentRunID) } // runWithCallStackAndWorkflow executes a workflow with an optional parent call stack. // If wf is nil, loads the workflow by name. Otherwise uses the provided workflow. +// runID/parentRunID thread the F106 run identity through to prepareExecution. func (s *ExecutionService) runWithCallStackAndWorkflow( ctx context.Context, workflowName string, wf *workflow.Workflow, inputs map[string]any, parentCallStack []string, + runID, parentRunID string, ) (*workflow.ExecutionContext, error) { if wf == nil { var err error @@ -633,7 +699,7 @@ func (s *ExecutionService) runWithCallStackAndWorkflow( } } - spanCtx, span, execCtx, err := s.prepareExecution(ctx, wf, inputs, parentCallStack) + spanCtx, span, execCtx, err := s.prepareExecution(ctx, wf, inputs, parentCallStack, runID, parentRunID) if err != nil { return nil, err } @@ -653,6 +719,8 @@ func (s *ExecutionService) executeStep( defer span.End() span.SetAttribute("step.name", step.Name) span.SetAttribute("step.type", string(step.Type)) + s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + defer s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepCompleted) startTime := time.Now() @@ -909,6 +977,8 @@ func (s *ExecutionService) executeParallelStep( span.SetAttribute("step.name", step.Name) span.SetAttribute("parallel.strategy", string(step.Strategy)) span.SetAttribute("parallel.branches", len(step.Branches)) + s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + defer s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepCompleted) startTime := time.Now() @@ -1029,6 +1099,8 @@ func (s *ExecutionService) executeLoopStep( defer loopSpan.End() loopSpan.SetAttribute("step.name", step.Name) loopSpan.SetAttribute("loop.type", string(step.Type)) + s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + defer s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepCompleted) startTime := time.Now() @@ -1982,6 +2054,8 @@ func (s *ExecutionService) executeCustomStepType( execCtx *workflow.ExecutionContext, ) (string, error) { startTime := time.Now() + s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + defer s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepCompleted) if s.stepTypeProvider == nil { state := workflow.StepState{ @@ -2101,6 +2175,8 @@ func (s *ExecutionService) executePluginOperation( ctx, span := s.startSpan(ctx, "plugin.rpc") defer span.End() span.SetAttribute("plugin.name", step.Operation) + s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + defer s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepCompleted) // Validate provider is configured if s.operationProvider == nil { @@ -2263,6 +2339,8 @@ func (s *ExecutionService) executeAgentStep( ctx, span := s.startSpan(ctx, "agent.call") defer span.End() + s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + defer s.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepCompleted) // Validate registry is configured if s.agentRegistry == nil { @@ -2373,6 +2451,7 @@ func (s *ExecutionService) executeAgentStep( if roleErr != nil { return "", fmt.Errorf("step %s: resolve role: %w", step.Name, roleErr) } + s.emitTranscriptAgentMessage(ctx, execCtx, resolvedPrompt, composedPrompt) if composedPrompt != "" { if _, exists := opts["system_prompt"]; exists { s.logger.Warn("options.system_prompt overridden by composed role+system_prompt", "step", step.Name) @@ -2383,7 +2462,7 @@ func (s *ExecutionService) executeAgentStep( // F099: Start MCP tool proxy if configured for this step. Injects the temp config // path into opts so the provider's MCP injector can reference it; cleanup runs after // provider.Execute / executeResumableAgentCall returns. - proxyCleanup, proxyErr := s.startToolProxy(stepCtx, step, opts, resolvedProvider, provider) + proxyCleanup, proxyErr := s.startToolProxy(stepCtx, step, opts, resolvedProvider, provider, execCtx) if proxyErr != nil { return "", fmt.Errorf("step %s: %w", step.Name, proxyErr) } @@ -2422,6 +2501,7 @@ func (s *ExecutionService) executeAgentStep( result = &workflow.AgentResult{ Provider: convResult.Provider, Output: convResult.Output, + RawOutput: convResult.RawOutput, Response: convResult.Response, Tokens: convResult.TokensTotal, StartedAt: convResult.StartedAt, @@ -2456,6 +2536,10 @@ func (s *ExecutionService) executeAgentStep( span.SetAttribute("model", model) } span.SetAttribute("tokens_used", result.Tokens) + // F106 US2: normalize the provider's RAW stream (NDJSON) into a message.assistant + // event, falling back to the extracted text for providers with no raw stream. + // Emitted before error handling so partial/dangling output is still captured. + s.emitTranscriptAgentResponse(ctx, execCtx, resolvedProvider, result.RawOutput, result.Output) } // Handle execution error (e.g., context canceled, provider error) @@ -2723,7 +2807,7 @@ func (s *ExecutionService) resolveOperationInputs( intCtx *interpolation.Context, ) (map[string]any, error) { if inputs == nil { - return nil, nil + return make(map[string]any), nil } resolved := make(map[string]any, len(inputs)) diff --git a/internal/application/execution_service_runid_test.go b/internal/application/execution_service_runid_test.go new file mode 100644 index 00000000..83376c0b --- /dev/null +++ b/internal/application/execution_service_runid_test.go @@ -0,0 +1,133 @@ +package application + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/workflow" + testmocks "github.com/awf-project/cli/internal/testutil/mocks" +) + +// newRunIDTestService builds a minimal ExecutionService capable of running +// prepareExecution (logger + resolver + hookExecutor wired). +func newRunIDTestService() *ExecutionService { + logger := &mockLogger{} + resolver := newMockResolver() + return &ExecutionService{ + outputLimiter: NewOutputLimiter(workflow.DefaultOutputLimits()), + logger: logger, + resolver: resolver, + hookExecutor: NewHookExecutor(newMockExecutor(), logger, resolver), + } +} + +func terminalOnlyWorkflow() *workflow.Workflow { + return &workflow.Workflow{ + Name: "rid-workflow", + Initial: "done", + Steps: map[string]*workflow.Step{ + "done": {Name: "done", Type: workflow.StepTypeTerminal, Status: workflow.TerminalSuccess}, + }, + } +} + +// TestPrepareExecution_UsesProvidedRunID verifies the explicit run identity threads +// into execCtx.WorkflowID (so the transcript filename and event run_id match) and that +// the parent run id is propagated for sub-workflow linkage. +func TestPrepareExecution_UsesProvidedRunID(t *testing.T) { + svc := newRunIDTestService() + wf := terminalOnlyWorkflow() + + _, span, execCtx, err := svc.prepareExecution(context.Background(), wf, nil, nil, "fixed-run-id", "parent-run-id") + require.NoError(t, err) + if span != nil { + defer span.End() + } + + assert.Equal(t, "fixed-run-id", execCtx.WorkflowID, "execCtx.WorkflowID must equal the provided run id") + assert.Equal(t, "parent-run-id", execCtx.ParentRunID, "parent run id must propagate to the child context") +} + +// TestPrepareExecution_GeneratesRunIDWhenEmpty verifies that an empty run id falls back +// to a generated UUID (preserving existing top-level run behavior). +func TestPrepareExecution_GeneratesRunIDWhenEmpty(t *testing.T) { + svc := newRunIDTestService() + wf := terminalOnlyWorkflow() + + _, span, execCtx, err := svc.prepareExecution(context.Background(), wf, nil, nil, "", "") + require.NoError(t, err) + if span != nil { + defer span.End() + } + + assert.NotEmpty(t, execCtx.WorkflowID, "an empty run id must fall back to a generated identifier") + assert.Empty(t, execCtx.ParentRunID, "no parent run id when none provided") +} + +// TestRunWithWorkflowAndRunID_StampsTranscriptRunID verifies the public entry point +// threads the CLI-provided run id all the way into emitted transcript events, so the +// .jsonl filename and the events' run_id are the same identifier. +func TestRunWithWorkflowAndRunID_StampsTranscriptRunID(t *testing.T) { + svc := newRunIDTestService() + rec := &fakeRecorder{} + svc.SetRecorder(rec) + svc.store = testmocks.NewMockStateStore() + + wf := &workflow.Workflow{ + Name: "rid-workflow", + Initial: "s1", + Steps: map[string]*workflow.Step{ + "s1": {Name: "s1", Type: workflow.StepTypeCommand, OnSuccess: "done"}, + "done": {Name: "done", Type: workflow.StepTypeTerminal, Status: workflow.TerminalSuccess}, + }, + } + svc.executor = newMockExecutor() + + execCtx, err := svc.RunWithWorkflowAndRunID(context.Background(), wf, nil, "cli-run-id") + require.NoError(t, err) + require.NotNil(t, execCtx) + assert.Equal(t, "cli-run-id", execCtx.WorkflowID) + + require.NotEmpty(t, rec.events, "expected transcript events to be emitted") + for _, ev := range rec.events { + assert.Equal(t, "cli-run-id", ev.RunID, "every transcript event must carry the provided run id") + } +} + +// TestRun_ChildRecorderViaContext_IsolatesEvents verifies F106 US5: a sub-run's events +// are routed to the context-scoped child Recorder (its own file) rather than the parent's, +// the child carries the provided ParentRunID, and the parent recorder is untouched. +func TestRun_ChildRecorderViaContext_IsolatesEvents(t *testing.T) { + svc := newRunIDTestService() + parentRec := &fakeRecorder{} + childRec := &fakeRecorder{} + svc.SetRecorder(parentRec) + svc.store = testmocks.NewMockStateStore() + svc.executor = newMockExecutor() + + wf := &workflow.Workflow{ + Name: "child-workflow", + Initial: "s1", + Steps: map[string]*workflow.Step{ + "s1": {Name: "s1", Type: workflow.StepTypeCommand, OnSuccess: "done"}, + "done": {Name: "done", Type: workflow.StepTypeTerminal, Status: workflow.TerminalSuccess}, + }, + } + + ctx := withRecorder(context.Background(), childRec) + execCtx, err := svc.runWithCallStackAndWorkflow(ctx, "", wf, nil, nil, "child-run", "parent-run") + require.NoError(t, err) + require.NotNil(t, execCtx) + + assert.Equal(t, "child-run", execCtx.WorkflowID) + assert.Equal(t, "parent-run", execCtx.ParentRunID) + + assert.Empty(t, parentRec.events, "parent recorder must not receive the child run's events") + require.NotEmpty(t, childRec.events, "child recorder must receive the child run's events") + for _, ev := range childRec.events { + assert.Equal(t, "child-run", ev.RunID, "child events must carry the child run id") + } +} diff --git a/internal/application/execution_service_transcript.go b/internal/application/execution_service_transcript.go new file mode 100644 index 00000000..488aeabe --- /dev/null +++ b/internal/application/execution_service_transcript.go @@ -0,0 +1,189 @@ +package application + +import ( + "context" + "strconv" + "strings" + "time" + + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/awf-project/cli/internal/domain/workflow" +) + +// recorderCtxKey scopes a per-run Recorder onto a context. Sub-workflow runs carry +// their own child Recorder (one file per sub-run, F106 US5) without mutating shared +// ExecutionService state, which keeps emission goroutine-safe under parallel steps. +type recorderCtxKey struct{} + +// withRecorder returns a context that routes transcript emission to rec. A nil rec is +// returned unchanged so callers can wrap unconditionally. +func withRecorder(ctx context.Context, rec ports.Recorder) context.Context { + if rec == nil { + return ctx + } + return context.WithValue(ctx, recorderCtxKey{}, rec) +} + +// recorderFor resolves the Recorder for ctx: a context-scoped child Recorder takes +// precedence, falling back to the service-level recorder for the top-level run. +func (s *ExecutionService) recorderFor(ctx context.Context) ports.Recorder { + if rec, ok := ctx.Value(recorderCtxKey{}).(ports.Recorder); ok && rec != nil { + return rec + } + return s.recorder +} + +func (s *ExecutionService) emitTranscriptEvent(ctx context.Context, event transcript.ExchangeEvent) { //nolint:gocritic // hugeParam: callers construct event inline; pointer indirection adds no benefit here + rec := s.recorderFor(ctx) + if rec == nil { + return + } + if err := rec.Record(ctx, event); err != nil && s.logger != nil { + s.logger.Warn("transcript record warning", "error", err, "event", event.Type) + } +} + +func (s *ExecutionService) emitTranscriptStep(ctx context.Context, ec *workflow.ExecutionContext, step *workflow.Step, eventType transcript.EventType) { + if s.recorderFor(ctx) == nil { + return + } + var iteration int + if ec.CurrentLoop != nil { + iteration = ec.CurrentLoop.Index + } + s.emitTranscriptEvent(ctx, transcript.ExchangeEvent{ + Type: eventType, + RunID: ec.WorkflowID, + ParentRunID: ec.ParentRunID, + Path: buildTranscriptPath(ec, step), + Iteration: iteration, + Timestamp: time.Now(), + Payload: &transcript.StepPayload{Name: step.Name, Kind: string(step.Type)}, + }) +} + +func (s *ExecutionService) emitTranscriptAgentMessage(ctx context.Context, ec *workflow.ExecutionContext, prompt, systemPrompt string) { + if s.recorderFor(ctx) == nil { + return + } + // User/system input blocks are composed by AWF (the orchestrator), not emitted by the + // agent — fidelity:"router" marks them accordingly (FR-002 requires the marker). + blocks := []transcript.ContentBlock{{Type: transcript.BlockTypeText, Fidelity: transcript.FidelityRouter, Text: prompt}} + if systemPrompt != "" { + blocks = append(blocks, transcript.ContentBlock{Type: transcript.BlockTypeText, Fidelity: transcript.FidelityRouter, Text: systemPrompt}) + } + s.emitTranscriptEvent(ctx, transcript.ExchangeEvent{ + Type: transcript.EventTypeMessageUser, + RunID: ec.WorkflowID, + ParentRunID: ec.ParentRunID, + Path: ec.WorkflowName, + Timestamp: time.Now(), + Payload: &transcript.MessagePayload{Role: "user", Blocks: blocks}, + }) +} + +// emitTranscriptAgentResponse normalizes a provider's raw NDJSON stream into ContentBlocks +// and emits a message.assistant event (F106 US2). +// +// rawOutput is the provider's RAW agent stream (NDJSON for CLI providers), NOT the +// extracted text response: the per-provider normalizers parse the NDJSON envelope, so +// feeding them the extracted text yields zero blocks (the P-1 production gap). text is the +// extracted assistant response used as a fallback for providers that expose no raw stream +// (e.g. openai_compatible over HTTP, whose tool calls are already captured at the Router +// seam — emitting only a text block here avoids double-counting per FR-009). +// +// It is a no-op when there is no recorder or when neither path yields any block, so a +// provider with no output never produces an empty assistant message. +func (s *ExecutionService) emitTranscriptAgentResponse(ctx context.Context, ec *workflow.ExecutionContext, provider, rawOutput, text string) { + if s.recorderFor(ctx) == nil { + return + } + + var blocks []transcript.ContentBlock + if s.agentOutputNormalizer != nil && rawOutput != "" { + blocks = s.agentOutputNormalizer.Normalize(provider, []byte(rawOutput)) + } + if len(blocks) == 0 && text != "" { + // No raw stream (or it normalized to nothing): capture the extracted assistant + // text as a single agent-emitted block rather than losing the turn entirely. + blocks = []transcript.ContentBlock{{Type: transcript.BlockTypeText, Fidelity: transcript.FidelityAgentEmitted, Text: text}} + } + if len(blocks) == 0 { + return + } + + s.emitTranscriptEvent(ctx, transcript.ExchangeEvent{ + Type: transcript.EventTypeMessageAssistant, + RunID: ec.WorkflowID, + ParentRunID: ec.ParentRunID, + Path: ec.WorkflowName, + Timestamp: time.Now(), + Payload: &transcript.MessagePayload{Role: "assistant", Blocks: blocks}, + }) +} + +func (s *ExecutionService) emitTranscriptCallWorkflowStarted(ctx context.Context, ec *workflow.ExecutionContext, step *workflow.Step, childRunID string) { + if s.recorderFor(ctx) == nil { + return + } + s.emitTranscriptEvent(ctx, transcript.ExchangeEvent{ + Type: transcript.EventTypeStepCallWorkflowStarted, + RunID: ec.WorkflowID, + ParentRunID: ec.ParentRunID, + ChildRunID: childRunID, + Path: step.Name, + Timestamp: time.Now(), + Payload: &transcript.StepPayload{Name: step.Name, Kind: string(step.Type)}, + }) +} + +func (s *ExecutionService) emitTranscriptCallWorkflowCompleted(ctx context.Context, ec *workflow.ExecutionContext, step *workflow.Step, childRunID string, execErr error) { + if s.recorderFor(ctx) == nil { + return + } + payload := &transcript.StepPayload{Name: step.Name, Kind: string(step.Type)} + if execErr != nil { + payload.Error = execErr.Error() + } + s.emitTranscriptEvent(ctx, transcript.ExchangeEvent{ + Type: transcript.EventTypeStepCallWorkflowCompleted, + RunID: ec.WorkflowID, + ParentRunID: ec.ParentRunID, + ChildRunID: childRunID, + Path: step.Name, + Timestamp: time.Now(), + Payload: payload, + }) +} + +// maxTranscriptLoopDepth bounds how many nested loop levels are encoded into a transcript +// path. Nesting deeper than this is truncated at the outermost levels — the path stays a +// human-readable label, not a lossless address (the run_id + step name remain exact). Eight +// levels is far beyond any realistic workflow nesting. +const maxTranscriptLoopDepth = 8 + +func buildTranscriptPath(ec *workflow.ExecutionContext, step *workflow.Step) string { + if ec.CurrentLoop == nil { + return step.Name + } + + var stack [maxTranscriptLoopDepth]*workflow.LoopContext + depth := 0 + for loop := ec.CurrentLoop; loop != nil && depth < len(stack); loop = loop.Parent { + stack[depth] = loop + depth++ + } + + var b strings.Builder + for i := depth - 1; i >= 0; i-- { + if i < depth-1 { + b.WriteByte('/') + } + b.WriteString("loop:") + b.WriteString(strconv.Itoa(stack[i].Index)) + } + b.WriteByte('/') + b.WriteString(step.Name) + return b.String() +} diff --git a/internal/application/execution_service_transcript_assistant_test.go b/internal/application/execution_service_transcript_assistant_test.go new file mode 100644 index 00000000..fd3a2a5f --- /dev/null +++ b/internal/application/execution_service_transcript_assistant_test.go @@ -0,0 +1,144 @@ +package application + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/awf-project/cli/internal/domain/workflow" +) + +// fakeNormalizer is a test double for ports.AgentOutputNormalizer. +type fakeNormalizer struct { + blocks []transcript.ContentBlock + gotProvider string + gotRaw string + calls int +} + +func (f *fakeNormalizer) Normalize(provider string, rawOutput []byte) []transcript.ContentBlock { + f.calls++ + f.gotProvider = provider + f.gotRaw = string(rawOutput) + return f.blocks +} + +// TestEmitTranscriptAgentResponse_EmitsAssistantMessage verifies F106 US2: provider raw +// NDJSON output is normalized and emitted as a message.assistant event carrying the blocks. +func TestEmitTranscriptAgentResponse_EmitsAssistantMessage(t *testing.T) { + svc := newTestExecutionService() + rec := &fakeRecorder{} + svc.SetRecorder(rec) + norm := &fakeNormalizer{blocks: []transcript.ContentBlock{ + {Type: transcript.BlockTypeText, Fidelity: transcript.FidelityAgentEmitted, Text: "hi"}, + }} + svc.SetAgentOutputNormalizer(norm) + + ec := workflow.NewExecutionContext("wf-assistant", "test-workflow") + // rawOutput is the NDJSON stream; text is the extracted fallback (ignored when blocks exist). + svc.emitTranscriptAgentResponse(context.Background(), ec, "claude", "raw-ndjson-output", "extracted text") + + require.Len(t, rec.events, 1) + ev := rec.events[0] + assert.Equal(t, transcript.EventTypeMessageAssistant, ev.Type) + assert.Equal(t, "wf-assistant", ev.RunID) + + payload, ok := ev.Payload.(*transcript.MessagePayload) + require.True(t, ok) + assert.Equal(t, "assistant", payload.Role) + require.Len(t, payload.Blocks, 1) + assert.Equal(t, "hi", payload.Blocks[0].Text) + assert.Equal(t, transcript.FidelityAgentEmitted, payload.Blocks[0].Fidelity) + + // Normalizer received the resolved provider name and the RAW NDJSON output (not the + // extracted text) — this is the P-1 regression guard: feeding extracted text here + // would yield zero blocks in production. + assert.Equal(t, "claude", norm.gotProvider) + assert.Equal(t, "raw-ndjson-output", norm.gotRaw) +} + +// TestEmitTranscriptAgentResponse_FallsBackToTextBlock verifies that when the normalizer +// yields no blocks (e.g. openai_compatible HTTP, which has no raw NDJSON stream, or a +// non-streaming provider) but an extracted assistant text is available, a single +// agent-emitted text block is captured so the assistant turn is never lost. +func TestEmitTranscriptAgentResponse_FallsBackToTextBlock(t *testing.T) { + svc := newTestExecutionService() + rec := &fakeRecorder{} + svc.SetRecorder(rec) + // Normalizer present but produces nothing from the (empty) raw stream. + svc.SetAgentOutputNormalizer(&fakeNormalizer{blocks: nil}) + + ec := workflow.NewExecutionContext("wf-fallback", "test-workflow") + svc.emitTranscriptAgentResponse(context.Background(), ec, "openai_compatible", "", "final answer") + + require.Len(t, rec.events, 1) + payload, ok := rec.events[0].Payload.(*transcript.MessagePayload) + require.True(t, ok) + assert.Equal(t, "assistant", payload.Role) + require.Len(t, payload.Blocks, 1) + assert.Equal(t, "final answer", payload.Blocks[0].Text) + assert.Equal(t, transcript.BlockTypeText, payload.Blocks[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, payload.Blocks[0].Fidelity) +} + +// TestEmitTranscriptAgentResponse_NoBlocksNoTextNoEvent verifies that output yielding no +// blocks and no fallback text does not produce an empty assistant message. +func TestEmitTranscriptAgentResponse_NoBlocksNoTextNoEvent(t *testing.T) { + svc := newTestExecutionService() + rec := &fakeRecorder{} + svc.SetRecorder(rec) + svc.SetAgentOutputNormalizer(&fakeNormalizer{blocks: nil}) + + ec := workflow.NewExecutionContext("wf-empty", "test-workflow") + svc.emitTranscriptAgentResponse(context.Background(), ec, "claude", "raw", "") + + assert.Empty(t, rec.events, "no blocks and no text must produce no assistant event") +} + +// TestEmitTranscriptAgentResponse_NilNormalizerStillFallsBack verifies that with no +// normalizer wired, the extracted assistant text is still captured as a text block. +func TestEmitTranscriptAgentResponse_NilNormalizerStillFallsBack(t *testing.T) { + svc := newTestExecutionService() + rec := &fakeRecorder{} + svc.SetRecorder(rec) + // no normalizer set + + ec := workflow.NewExecutionContext("wf-nonorm", "test-workflow") + svc.emitTranscriptAgentResponse(context.Background(), ec, "claude", "raw", "answer text") + + require.Len(t, rec.events, 1) + payload, ok := rec.events[0].Payload.(*transcript.MessagePayload) + require.True(t, ok) + require.Len(t, payload.Blocks, 1) + assert.Equal(t, "answer text", payload.Blocks[0].Text) +} + +// TestEmitTranscriptAgentResponse_NoRecorderIsNoOp verifies graceful degradation when no +// recorder is wired. +func TestEmitTranscriptAgentResponse_NoRecorderIsNoOp(t *testing.T) { + svc := newTestExecutionService() + svc.SetAgentOutputNormalizer(&fakeNormalizer{blocks: []transcript.ContentBlock{{Type: transcript.BlockTypeText}}}) + + ec := workflow.NewExecutionContext("wf-norec", "test-workflow") + // Must not panic and must be a no-op (no recorder). + svc.emitTranscriptAgentResponse(context.Background(), ec, "claude", "raw", "text") +} + +// TestEmitTranscriptAgentResponse_EmptyRawSkipsNormalizer verifies an empty raw stream +// short-circuits the normalizer (the text fallback path is exercised separately). +func TestEmitTranscriptAgentResponse_EmptyRawSkipsNormalizer(t *testing.T) { + svc := newTestExecutionService() + rec := &fakeRecorder{} + svc.SetRecorder(rec) + norm := &fakeNormalizer{blocks: []transcript.ContentBlock{{Type: transcript.BlockTypeText}}} + svc.SetAgentOutputNormalizer(norm) + + ec := workflow.NewExecutionContext("wf-emptyraw", "test-workflow") + svc.emitTranscriptAgentResponse(context.Background(), ec, "claude", "", "") + + assert.Empty(t, rec.events) + assert.Equal(t, 0, norm.calls, "empty raw output must short-circuit before the normalizer") +} diff --git a/internal/application/execution_service_transcript_callworkflow_test.go b/internal/application/execution_service_transcript_callworkflow_test.go new file mode 100644 index 00000000..da85334c --- /dev/null +++ b/internal/application/execution_service_transcript_callworkflow_test.go @@ -0,0 +1,145 @@ +package application + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/awf-project/cli/internal/domain/workflow" +) + +// mockRecorderWithClose tracks Close() calls via a counter. +type mockRecorderWithClose struct { + events []transcript.ExchangeEvent + closeCalls int + returnErr error + closeErr error +} + +func (m *mockRecorderWithClose) Record(_ context.Context, event transcript.ExchangeEvent) error { + if m.returnErr != nil { + return m.returnErr + } + m.events = append(m.events, event) + return nil +} + +func (m *mockRecorderWithClose) Subscribe() (<-chan transcript.ExchangeEvent, func()) { + ch := make(chan transcript.ExchangeEvent) + return ch, func() { close(ch) } +} + +func (m *mockRecorderWithClose) Close() error { + m.closeCalls++ + return m.closeErr +} + +// TestExecutionService_CallWorkflowLinkage verifies parent-child transcript linkage with child_run_id and parent_run_id. +func TestExecutionService_CallWorkflowLinkage(t *testing.T) { + ctx := context.Background() + parentRecorder := &fakeRecorder{} + + svc := newTestExecutionService() + svc.SetRecorder(parentRecorder) + + parentID := "parent-wf-123" + childID := "child-wf-456" + + // Parent execution context + parentExecCtx := workflow.NewExecutionContext(parentID, "parent-workflow") + + // Simulate parent call_workflow step with child ID + step := &workflow.Step{ + Name: "call-child", + Type: workflow.StepTypeCallWorkflow, + CallWorkflow: &workflow.CallWorkflowConfig{ + Workflow: "child-workflow", + }, + } + + // Emit call_workflow.started with child_run_id + svc.emitTranscriptCallWorkflowStarted(ctx, parentExecCtx, step, childID) + + // Emit call_workflow.completed with child_run_id + svc.emitTranscriptCallWorkflowCompleted(ctx, parentExecCtx, step, childID, nil) + + // Verify parent transcript has call_workflow events with child_run_id + require.Len(t, parentRecorder.events, 2) + + startEvent := parentRecorder.events[0] + assert.Equal(t, transcript.EventTypeStepCallWorkflowStarted, startEvent.Type) + assert.Equal(t, childID, startEvent.ChildRunID) + assert.Equal(t, parentID, startEvent.RunID) + + completeEvent := parentRecorder.events[1] + assert.Equal(t, transcript.EventTypeStepCallWorkflowCompleted, completeEvent.Type) + assert.Equal(t, childID, completeEvent.ChildRunID) + assert.Equal(t, parentID, completeEvent.RunID) + + // Verify child context can have ParentRunID set (will be used when creating child recorder) + childExecCtx := workflow.NewExecutionContext(childID, "child-workflow") + childExecCtx.ParentRunID = parentID + assert.Equal(t, parentID, childExecCtx.ParentRunID) +} + +// TestExecutionService_CallWorkflowDefersChildClose verifies child Recorder Close() is called via defer even on execution error. +func TestExecutionService_CallWorkflowDefersChildClose(t *testing.T) { + mockRecorder := &mockRecorderWithClose{ + returnErr: nil, + } + + // Test 1: Verify that mockRecorderWithClose tracks Close() calls + _ = mockRecorder.Record(context.Background(), transcript.ExchangeEvent{}) + _ = mockRecorder.Close() + + // Verify Close was called via defer tracking + require.Greater(t, mockRecorder.closeCalls, 0, "Close() should be called via defer even on error") + initialCloseCalls := mockRecorder.closeCalls + + // Test 2: Verify Close is idempotent and can be called multiple times + _ = mockRecorder.Close() + assert.Greater(t, mockRecorder.closeCalls, initialCloseCalls, "Close() should track each call") + + // Test 3: Verify mock recorder properly implements ports.Recorder interface + var _ ports.Recorder = mockRecorder +} + +// TestExecutionService_CallWorkflowChildRecorderLifecycle verifies complete child recorder lifecycle with defer Close. +func TestExecutionService_CallWorkflowChildRecorderLifecycle(t *testing.T) { + mockRecorder := &mockRecorderWithClose{} + + // Simulate the pattern used in executeCallWorkflowStep: + // 1. Create child recorder + // 2. Defer close + // 3. Record events + // 4. Even if error occurs, Close is still called + + func() { + // This simulates what happens inside executeCallWorkflowStep + if mockRecorder != nil { + defer mockRecorder.Close() + } + + // Record some events simulating child workflow execution + _ = mockRecorder.Record(context.Background(), transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + RunID: "child-123", + ParentRunID: "parent-456", + }) + + // Simulate an error during child execution + // even though we return early, defer still executes + }() + + // After function exit, defer should have called Close() + assert.Greater(t, mockRecorder.closeCalls, 0, "defer childRecorder.Close() should be executed even with early return") + assert.Len(t, mockRecorder.events, 1) + assert.Equal(t, "parent-456", mockRecorder.events[0].ParentRunID) +} + +// Ensure our mock implements ports.Recorder +var _ ports.Recorder = (*mockRecorderWithClose)(nil) diff --git a/internal/application/execution_service_transcript_loop_test.go b/internal/application/execution_service_transcript_loop_test.go new file mode 100644 index 00000000..8d239c1e --- /dev/null +++ b/internal/application/execution_service_transcript_loop_test.go @@ -0,0 +1,238 @@ +package application + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/awf-project/cli/internal/domain/workflow" +) + +// TestExecutionService_LoopIterationInPath verifies for_each iterations are tracked in transcript. +func TestExecutionService_LoopIterationInPath(t *testing.T) { + ctx := context.Background() + recorder := &fakeRecorder{} + + svc := newTestExecutionService() + svc.SetRecorder(recorder) + + wfID := "loop-foreach-123" + + execCtx := workflow.NewExecutionContext(wfID, "loop-foreach-workflow") + step := &workflow.Step{ + Name: "loop-step", + Type: workflow.StepTypeForEach, + Loop: &workflow.LoopConfig{ + Type: workflow.LoopTypeForEach, + Items: "{{inputs.items}}", + }, + } + + // Simulate 3 loop iterations: each should emit step events with iteration 0, 1, 2 + for i := range 3 { + // Set up loop context for this iteration + loopCtx := &workflow.LoopContext{ + Index: i, + Item: "item-" + string(rune('0'+i)), + } + execCtx.CurrentLoop = loopCtx + + // Emit step started event for this iteration + svc.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + + // Emit step completed event + svc.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepCompleted) + } + + // Verify we have 6 events (2 per iteration: started + completed) + require.Len(t, recorder.events, 6) + + // Verify each pair has correct iteration and path includes loop information + for i := range 3 { + startIdx := i * 2 + endIdx := i*2 + 1 + + startEvent := recorder.events[startIdx] + assert.Equal(t, transcript.EventTypeStepStarted, startEvent.Type) + assert.Equal(t, i, startEvent.Iteration) + // Path should include loop context: "loop:0/loop-step", "loop:1/loop-step", "loop:2/loop-step" + assert.Contains(t, startEvent.Path, "loop-step") + assert.Contains(t, startEvent.Path, "loop:"+string(rune('0'+i))) + + endEvent := recorder.events[endIdx] + assert.Equal(t, transcript.EventTypeStepCompleted, endEvent.Type) + assert.Equal(t, i, endEvent.Iteration) + assert.Contains(t, endEvent.Path, "loop-step") + assert.Contains(t, endEvent.Path, "loop:"+string(rune('0'+i))) + } +} + +// TestExecutionService_WhileLoopIterations verifies while loop iterations are tracked with incrementing iteration. +func TestExecutionService_WhileLoopIterations(t *testing.T) { + ctx := context.Background() + recorder := &fakeRecorder{} + + svc := newTestExecutionService() + svc.SetRecorder(recorder) + + wfID := "loop-while-123" + + execCtx := workflow.NewExecutionContext(wfID, "loop-while-workflow") + step := &workflow.Step{ + Name: "while-step", + Type: workflow.StepTypeWhile, + Loop: &workflow.LoopConfig{ + Type: workflow.LoopTypeWhile, + Condition: "{{states.counter.output}}", + }, + } + + // Simulate 5 while loop iterations (0-4) + const numIterations = 5 + for i := range numIterations { + loopCtx := &workflow.LoopContext{ + Index: i, + Item: nil, // while loops don't have items + } + execCtx.CurrentLoop = loopCtx + + // Emit step started event + svc.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + + // Emit step completed event + svc.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepCompleted) + } + + // Verify we have 10 events (2 per iteration) + require.Len(t, recorder.events, 10) + + // Verify iteration numbers increment from 0 to 4 + for i := range numIterations { + startIdx := i * 2 + endIdx := i*2 + 1 + + startEvent := recorder.events[startIdx] + assert.Equal(t, transcript.EventTypeStepStarted, startEvent.Type) + assert.Equal(t, i, startEvent.Iteration, "iteration should be %d for start event", i) + + endEvent := recorder.events[endIdx] + assert.Equal(t, transcript.EventTypeStepCompleted, endEvent.Type) + assert.Equal(t, i, endEvent.Iteration, "iteration should be %d for end event", i) + } +} + +// TestExecutionService_NestedLoopIterationPath verifies nested loop path building. +func TestExecutionService_NestedLoopIterationPath(t *testing.T) { + ctx := context.Background() + recorder := &fakeRecorder{} + + svc := newTestExecutionService() + svc.SetRecorder(recorder) + + wfID := "nested-loop-123" + + execCtx := workflow.NewExecutionContext(wfID, "nested-loop-workflow") + step := &workflow.Step{ + Name: "outer-step", + Type: workflow.StepTypeForEach, + } + + // Create nested loop context: outer index 1, inner index 2 + outerLoop := &workflow.LoopContext{ + Index: 1, + Item: "outer-item-1", + } + innerLoop := &workflow.LoopContext{ + Index: 2, + Item: "inner-item-2", + Parent: outerLoop, + } + execCtx.CurrentLoop = innerLoop + + // Emit event from nested loop + svc.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + + // Verify path contains both loop information + require.Len(t, recorder.events, 1) + event := recorder.events[0] + assert.Equal(t, 2, event.Iteration) +} + +// TestExecutionService_LoopIterationWithoutContext verifies iteration is 0 when no loop context. +func TestExecutionService_LoopIterationWithoutContext(t *testing.T) { + ctx := context.Background() + recorder := &fakeRecorder{} + + svc := newTestExecutionService() + svc.SetRecorder(recorder) + + wfID := "no-loop-123" + + execCtx := workflow.NewExecutionContext(wfID, "no-loop-workflow") + step := &workflow.Step{ + Name: "regular-step", + Type: workflow.StepTypeCommand, + } + + // No loop context set + execCtx.CurrentLoop = nil + + svc.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + + // Iteration should be 0 (default) + require.Len(t, recorder.events, 1) + event := recorder.events[0] + assert.Equal(t, 0, event.Iteration) + assert.Equal(t, step.Name, event.Path) +} + +// TestExecutionService_MultipleLoopItemsPathStructure verifies path structure across items. +func TestExecutionService_MultipleLoopItemsPathStructure(t *testing.T) { + tests := []struct { + name string + items []string + expectLen int + }{ + { + name: "single item", + items: []string{"item-0"}, + expectLen: 2, // start + complete + }, + { + name: "three items", + items: []string{"a", "b", "c"}, + expectLen: 6, // 3 * (start + complete) + }, + { + name: "five items", + items: []string{"1", "2", "3", "4", "5"}, + expectLen: 10, // 5 * (start + complete) + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + recorder := &fakeRecorder{} + + svc := newTestExecutionService() + svc.SetRecorder(recorder) + + execCtx := workflow.NewExecutionContext("wf-path-test", "test-workflow") + step := &workflow.Step{Name: "test-step", Type: workflow.StepTypeForEach} + + for i := range len(tt.items) { + loopCtx := &workflow.LoopContext{Index: i, Item: tt.items[i]} + execCtx.CurrentLoop = loopCtx + + svc.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + svc.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepCompleted) + } + + assert.Len(t, recorder.events, tt.expectLen) + }) + } +} diff --git a/internal/application/execution_service_transcript_test.go b/internal/application/execution_service_transcript_test.go new file mode 100644 index 00000000..29f52718 --- /dev/null +++ b/internal/application/execution_service_transcript_test.go @@ -0,0 +1,406 @@ +package application + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/awf-project/cli/internal/domain/workflow" +) + +// fakeRecorder captures recorded events for testing. +type fakeRecorder struct { + events []transcript.ExchangeEvent + err error // if set, Record returns this error +} + +func (f *fakeRecorder) Record(_ context.Context, event transcript.ExchangeEvent) error { + if f.err != nil { + return f.err + } + f.events = append(f.events, event) + return nil +} + +func (f *fakeRecorder) Subscribe() (<-chan transcript.ExchangeEvent, func()) { + ch := make(chan transcript.ExchangeEvent) + return ch, func() { close(ch) } +} + +func (f *fakeRecorder) Close() error { + return nil +} + +// TestExecutionService_SetRecorder_StoresRecorder verifies SetRecorder stores the recorder. +func TestExecutionService_SetRecorder_StoresRecorder(t *testing.T) { + svc := newTestExecutionService() + recorder := &fakeRecorder{} + + svc.SetRecorder(recorder) + + // Verify the recorder was stored by attempting to use it + ctx := context.Background() + execCtx := workflow.NewExecutionContext("wf-verify", "test-workflow") + step := &workflow.Step{ + Name: "verify-step", + Type: "command", + } + + // Call emitter which will use the stored recorder + svc.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + + // Verify event was recorded (only happens if recorder was stored) + assert.Len(t, recorder.events, 1) +} + +// TestExecutionService_SetRecorder_AllowsNil verifies nil recorder is accepted without validation. +func TestExecutionService_SetRecorder_AllowsNil(t *testing.T) { + svc := newTestExecutionService() + + // Should not panic or error + svc.SetRecorder(nil) + + assert.NotNil(t, svc) +} + +// TestExecutionService_NilRecorderIsNoOp verifies minimal allocations when recorder is nil. +func TestExecutionService_NilRecorderIsNoOp(t *testing.T) { + svc := newTestExecutionService() + svc.SetRecorder(nil) + + ctx := context.Background() + execCtx := workflow.NewExecutionContext("wf-alloc", "test-workflow") + step := &workflow.Step{ + Name: "alloc-step", + Type: "command", + } + + // Measure baseline (no allocations expected with nil recorder — just a nil check and return) + allocsWithNil := uint64(testing.AllocsPerRun(100, func() { + svc.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepStarted) + svc.emitTranscriptStep(ctx, execCtx, step, transcript.EventTypeStepCompleted) + })) + + // With nil recorder, emitters should do nothing (only check s.recorder == nil, then return) + // Zero allocations expected on hot path per NFR-003 + assert.Equal(t, uint64(0), allocsWithNil) +} + +// TestExecutionService_BuildTranscriptPath_SingleStep verifies path construction for a single step. +func TestExecutionService_BuildTranscriptPath_SingleStep(t *testing.T) { + execCtx := workflow.NewExecutionContext("wf-123", "test-workflow") + step := &workflow.Step{ + Name: "my-step", + Type: "command", + } + + // buildTranscriptPath is called internally; this test verifies the path format + // when no loop context exists + path := buildTranscriptPathHelper(execCtx, step) + + assert.Equal(t, "my-step", path) +} + +// TestExecutionService_BuildTranscriptPath_WithLoop verifies path construction with loop context. +func TestExecutionService_BuildTranscriptPath_WithLoop(t *testing.T) { + execCtx := workflow.NewExecutionContext("wf-123", "test-workflow") + execCtx.CurrentLoop = &workflow.LoopContext{ + Index: 2, + Item: "item-2", + } + step := &workflow.Step{ + Name: "loop-step", + Type: "command", + } + + path := buildTranscriptPathHelper(execCtx, step) + + // Should include loop index + assert.Contains(t, path, "loop-step") + assert.Contains(t, path, "2") +} + +// TestExecutionService_BuildTranscriptPath_NestedLoops verifies path construction with nested loops. +func TestExecutionService_BuildTranscriptPath_NestedLoops(t *testing.T) { + execCtx := workflow.NewExecutionContext("wf-123", "test-workflow") + + // Create nested loop structure: parent loop with index 1, child loop with index 3 + parentLoop := &workflow.LoopContext{ + Index: 1, + Item: "parent-item", + } + childLoop := &workflow.LoopContext{ + Index: 3, + Item: "child-item", + Parent: parentLoop, + } + execCtx.CurrentLoop = childLoop + + step := &workflow.Step{ + Name: "nested-step", + Type: "command", + } + + path := buildTranscriptPathHelper(execCtx, step) + + // Should walk parent chain and include both indices + assert.Contains(t, path, "nested-step") + assert.Contains(t, path, "1") + assert.Contains(t, path, "3") +} + +// TestExecutionService_EmitTranscriptStepStarted_ConstructsEvent verifies step.started event structure. +func TestExecutionService_EmitTranscriptStepStarted_ConstructsEvent(t *testing.T) { + svc := newTestExecutionService() + recorder := &fakeRecorder{} + svc.SetRecorder(recorder) + + ctx := context.Background() + execCtx := workflow.NewExecutionContext("wf-456", "test-workflow") + step := &workflow.Step{ + Name: "my-step", + Type: "agent", + } + + emitTranscriptStepStartedHelper(svc, ctx, execCtx, step) + + // Verify event was recorded + require.Len(t, recorder.events, 1) + event := recorder.events[0] + + assert.Equal(t, transcript.EventTypeStepStarted, event.Type) + assert.Equal(t, "wf-456", event.RunID) + assert.Equal(t, "my-step", event.Path) + assert.NotZero(t, event.Timestamp) + + // Verify payload contains step name and kind + payload, ok := event.Payload.(*transcript.StepPayload) + require.True(t, ok) + assert.Equal(t, "my-step", payload.Name) + assert.Equal(t, "agent", payload.Kind) +} + +// TestExecutionService_EmitTranscriptStepCompleted_ConstructsEvent verifies step.completed event. +func TestExecutionService_EmitTranscriptStepCompleted_ConstructsEvent(t *testing.T) { + svc := newTestExecutionService() + recorder := &fakeRecorder{} + svc.SetRecorder(recorder) + + ctx := context.Background() + execCtx := workflow.NewExecutionContext("wf-789", "test-workflow") + step := &workflow.Step{ + Name: "finished-step", + Type: "command", + } + + emitTranscriptStepCompletedHelper(svc, ctx, execCtx, step) + + // Verify event was recorded + require.Len(t, recorder.events, 1) + event := recorder.events[0] + + assert.Equal(t, transcript.EventTypeStepCompleted, event.Type) + assert.Equal(t, "wf-789", event.RunID) +} + +// TestExecutionService_EmitTranscriptAgentMessage_CarriesPromptAndSystemPrompt verifies message.user event. +func TestExecutionService_EmitTranscriptAgentMessage_CarriesPromptAndSystemPrompt(t *testing.T) { + svc := newTestExecutionService() + recorder := &fakeRecorder{} + svc.SetRecorder(recorder) + + ctx := context.Background() + execCtx := workflow.NewExecutionContext("wf-abc", "test-workflow") + + userPrompt := "Generate a summary" + systemPrompt := "You are a helpful assistant" + + emitTranscriptAgentMessageHelper(svc, ctx, execCtx, userPrompt, systemPrompt) + + // Verify message.user event + require.Len(t, recorder.events, 1) + event := recorder.events[0] + + assert.Equal(t, transcript.EventTypeMessageUser, event.Type) + assert.Equal(t, "wf-abc", event.RunID) + + // Verify MessagePayload with both prompt and system_prompt as blocks + payload, ok := event.Payload.(*transcript.MessagePayload) + require.True(t, ok) + assert.Equal(t, "user", payload.Role) + require.Len(t, payload.Blocks, 2) + + // First block: user prompt + assert.Equal(t, transcript.BlockTypeText, payload.Blocks[0].Type) + assert.Equal(t, userPrompt, payload.Blocks[0].Text) + + // Second block: system prompt + assert.Equal(t, transcript.BlockTypeText, payload.Blocks[1].Type) + assert.Equal(t, systemPrompt, payload.Blocks[1].Text) +} + +// TestExecutionService_EmitTranscriptStep_PropagatesParentRunID verifies P-3 / FR-007: +// events emitted within a sub-workflow run carry parent_run_id sourced from the child +// ExecutionContext, so a child transcript is navigable back to its parent. This guards the +// emitter itself (not a hand-built event), which is what was missing before. +func TestExecutionService_EmitTranscriptStep_PropagatesParentRunID(t *testing.T) { + svc := newTestExecutionService() + recorder := &fakeRecorder{} + svc.SetRecorder(recorder) + + execCtx := workflow.NewExecutionContext("child-run", "child-workflow") + execCtx.ParentRunID = "parent-run" + step := &workflow.Step{Name: "child-step", Type: workflow.StepTypeCommand} + + emitTranscriptStepStartedHelper(svc, context.Background(), execCtx, step) + + require.Len(t, recorder.events, 1) + ev := recorder.events[0] + assert.Equal(t, "child-run", ev.RunID) + assert.Equal(t, "parent-run", ev.ParentRunID, "child step event must carry parent_run_id (FR-007)") +} + +// TestExecutionService_EmitTranscriptStep_NoParentRunIDForTopLevel verifies a top-level run +// (no parent) emits events with an empty parent_run_id, which omitempty drops from JSON. +func TestExecutionService_EmitTranscriptStep_NoParentRunIDForTopLevel(t *testing.T) { + svc := newTestExecutionService() + recorder := &fakeRecorder{} + svc.SetRecorder(recorder) + + execCtx := workflow.NewExecutionContext("top-run", "top-workflow") + step := &workflow.Step{Name: "top-step", Type: workflow.StepTypeCommand} + + emitTranscriptStepStartedHelper(svc, context.Background(), execCtx, step) + + require.Len(t, recorder.events, 1) + assert.Empty(t, recorder.events[0].ParentRunID) +} + +// TestExecutionService_RecorderErrorLoggedNotPropagated verifies errors are logged as WARN. +func TestExecutionService_RecorderErrorLoggedNotPropagated(t *testing.T) { + // Create logger mock to capture WARN calls + loggedWarnings := make([]string, 0) + logger := &fakeLogger{ + warnFunc: func(msg string, keyVals ...any) { + loggedWarnings = append(loggedWarnings, msg) + }, + } + + svc := &ExecutionService{ + outputLimiter: NewOutputLimiter(workflow.DefaultOutputLimits()), + logger: logger, + } + + // Configure recorder to fail + failingRecorder := &fakeRecorder{ + err: ports.ErrInvalidEvent, + } + svc.SetRecorder(failingRecorder) + + ctx := context.Background() + execCtx := workflow.NewExecutionContext("wf-jkl", "test-workflow") + step := &workflow.Step{ + Name: "test-step", + Type: "command", + } + + // Emit should not panic despite recorder error + emitTranscriptStepStartedHelper(svc, ctx, execCtx, step) + + // Should have logged a WARN + require.Greater(t, len(loggedWarnings), 0) + assert.Contains(t, loggedWarnings[0], "warn") // Logger should have logged a warning +} + +// TestExecutionService_StepLifecycleEmitsSequence verifies step.started and step.completed are emitted. +func TestExecutionService_StepLifecycleEmitsSequence(t *testing.T) { + svc := newTestExecutionService() + recorder := &fakeRecorder{} + svc.SetRecorder(recorder) + + ctx := context.Background() + execCtx := workflow.NewExecutionContext("wf-mno", "test-workflow") + step := &workflow.Step{ + Name: "lifecycle-step", + Type: "command", + } + + // Emit step started + emitTranscriptStepStartedHelper(svc, ctx, execCtx, step) + + // Emit step completed + emitTranscriptStepCompletedHelper(svc, ctx, execCtx, step) + + // Verify event sequence + require.Len(t, recorder.events, 2) + assert.Equal(t, transcript.EventTypeStepStarted, recorder.events[0].Type) + assert.Equal(t, transcript.EventTypeStepCompleted, recorder.events[1].Type) +} + +// TestExecutionService_NilRecorderAllowsNormalFlow verifies workflow continues when recorder is nil. +func TestExecutionService_NilRecorderAllowsNormalFlow(t *testing.T) { + svc := newTestExecutionService() + svc.SetRecorder(nil) // Explicitly set to nil + + ctx := context.Background() + execCtx := workflow.NewExecutionContext("wf-pqr", "test-workflow") + step := &workflow.Step{ + Name: "normal-step", + Type: "command", + } + + // Should not panic or error with nil recorder + emitTranscriptStepStartedHelper(svc, ctx, execCtx, step) + emitTranscriptAgentMessageHelper(svc, ctx, execCtx, "prompt", "system") + emitTranscriptStepCompletedHelper(svc, ctx, execCtx, step) + + assert.NotNil(t, svc) +} + +// fakeLogger captures log messages for testing. +type fakeLogger struct { + warnFunc func(msg string, keyVals ...any) + debugFunc func(msg string, keyVals ...any) +} + +func (f *fakeLogger) Debug(msg string, keyVals ...any) { + if f.debugFunc != nil { + f.debugFunc(msg, keyVals...) + } +} + +func (f *fakeLogger) Info(msg string, keyVals ...any) {} + +func (f *fakeLogger) Warn(msg string, keyVals ...any) { + if f.warnFunc != nil { + f.warnFunc(msg, keyVals...) + } +} + +func (f *fakeLogger) Error(msg string, keyVals ...any) {} + +func (f *fakeLogger) WithContext(ctx map[string]any) ports.Logger { + return f +} + +// Helper functions that call the actual emitter methods + +func buildTranscriptPathHelper(ec *workflow.ExecutionContext, step *workflow.Step) string { + return buildTranscriptPath(ec, step) +} + +func emitTranscriptStepStartedHelper(svc *ExecutionService, ctx context.Context, ec *workflow.ExecutionContext, step *workflow.Step) { + svc.emitTranscriptStep(ctx, ec, step, transcript.EventTypeStepStarted) +} + +func emitTranscriptStepCompletedHelper(svc *ExecutionService, ctx context.Context, ec *workflow.ExecutionContext, step *workflow.Step) { + svc.emitTranscriptStep(ctx, ec, step, transcript.EventTypeStepCompleted) +} + +func emitTranscriptAgentMessageHelper(svc *ExecutionService, ctx context.Context, ec *workflow.ExecutionContext, prompt, systemPrompt string) { + svc.emitTranscriptAgentMessage(ctx, ec, prompt, systemPrompt) +} diff --git a/internal/application/execution_setup.go b/internal/application/execution_setup.go index 50c5fb3a..8e7662ab 100644 --- a/internal/application/execution_setup.go +++ b/internal/application/execution_setup.go @@ -98,6 +98,9 @@ type setupConfig struct { pluginProviders PluginProviders tracer ports.Tracer auditWriter ports.AuditTrailWriter + recorder ports.Recorder + recorderFactory ports.RecorderFactory + transcriptDir string packName string packResolver PackWorkflowLoader outputWriters *OutputWriterPair @@ -136,6 +139,24 @@ func WithAuditWriter(w ports.AuditTrailWriter) SetupOption { return func(c *setupConfig) { c.auditWriter = w } } +// WithRecorder configures a transcript recorder for F106 canonical exchange format. +func WithRecorder(r ports.Recorder) SetupOption { + return func(c *setupConfig) { c.recorder = r } +} + +// WithRecorderFactory configures the factory used to create per-sub-run child +// recorders for F106 sub-workflow transcript linkage. Without it, executeCallWorkflowStep +// cannot open child transcript files and sub-workflow linkage is disabled. +func WithRecorderFactory(f ports.RecorderFactory) SetupOption { + return func(c *setupConfig) { c.recorderFactory = f } +} + +// WithTranscriptDir configures the directory where child transcript files are written +// for F106 sub-workflow linkage. Paired with WithRecorderFactory. +func WithTranscriptDir(dir string) SetupOption { + return func(c *setupConfig) { c.transcriptDir = dir } +} + // WithPackContext sets the pack name and its associated workflow loader. // When packName is non-empty, pack-scoped XDG paths are used instead of the global paths. func WithPackContext(name string, resolver PackWorkflowLoader) SetupOption { @@ -322,6 +343,11 @@ func (s *ExecutionSetup) Build(_ context.Context) (*SetupResult, error) { execSvc.SetAuditTrailWriter(cfg.auditWriter) } + // F106 US2: normalize provider agent output into transcript message.assistant blocks. + execSvc.SetAgentOutputNormalizer(agents.NewContentBlockNormalizer()) + + wireTranscriptRecording(execSvc, cfg) + if cfg.tracer != nil { execSvc.SetTracer(cfg.tracer) } @@ -378,6 +404,22 @@ func (s *ExecutionSetup) Build(_ context.Context) (*SetupResult, error) { }, nil } +// wireTranscriptRecording attaches the F106 transcript recorder, the per-sub-run +// recorder factory, and the transcript directory to the execution service when +// each is configured. Without the factory and directory, executeCallWorkflowStep's +// child-recorder block is unreachable and sub-workflow linkage is disabled. +func wireTranscriptRecording(execSvc *ExecutionService, cfg *setupConfig) { + if cfg.recorder != nil { + execSvc.SetRecorder(cfg.recorder) + } + if cfg.recorderFactory != nil { + execSvc.SetRecorderFactory(cfg.recorderFactory) + } + if cfg.transcriptDir != "" { + execSvc.SetTranscriptDir(cfg.transcriptDir) + } +} + // buildProviders assembles the composite operation provider from built-in and plugin-supplied // providers, honoring the PluginStateChecker gate for each built-in. // diff --git a/internal/application/execution_setup_recorder_factory_test.go b/internal/application/execution_setup_recorder_factory_test.go new file mode 100644 index 00000000..e7c0f0c5 --- /dev/null +++ b/internal/application/execution_setup_recorder_factory_test.go @@ -0,0 +1,49 @@ +package application + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/ports" + testmocks "github.com/awf-project/cli/internal/testutil/mocks" +) + +// TestBuild_WiresRecorderFactoryAndTranscriptDir verifies that WithRecorderFactory +// and WithTranscriptDir propagate through ExecutionSetup.Build onto the resulting +// ExecutionService. Without this wiring the executeCallWorkflowStep child-recorder +// block (if s.recorderFactory != nil) is permanently unreachable in production and +// F106 sub-workflow transcript linkage is dead code. +func TestBuild_WiresRecorderFactoryAndTranscriptDir(t *testing.T) { + repo := testmocks.NewMockWorkflowRepository() + store := testmocks.NewMockStateStore() + executor := testmocks.NewMockCommandExecutor() + logger := testmocks.NewMockLogger() + + var factoryCalledWith string + factory := func(path string) (ports.Recorder, error) { + factoryCalledWith = path + return &fakeRecorder{}, nil + } + + setup := NewExecutionSetup(repo, store, executor, logger, + WithRecorderFactory(factory), + WithTranscriptDir("/tmp/awf-transcripts"), + ) + + result, err := setup.Build(context.Background()) + require.NoError(t, err) + require.NotNil(t, result.ExecService) + + // transcriptDir must be wired through to the service base dir. + assert.Equal(t, "/tmp/awf-transcripts", result.ExecService.transcriptBaseDir()) + + // recorderFactory must be wired and callable so child recorders can be created. + require.NotNil(t, result.ExecService.recorderFactory, "recorderFactory must be wired by Build") + rec, err := result.ExecService.recorderFactory("/tmp/awf-transcripts/child.jsonl") + require.NoError(t, err) + assert.NotNil(t, rec) + assert.Equal(t, "/tmp/awf-transcripts/child.jsonl", factoryCalledWith) +} diff --git a/internal/application/execution_setup_test.go b/internal/application/execution_setup_test.go index 5231fa21..5cfbf041 100644 --- a/internal/application/execution_setup_test.go +++ b/internal/application/execution_setup_test.go @@ -9,6 +9,7 @@ import ( "github.com/awf-project/cli/internal/application" "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" "github.com/awf-project/cli/internal/domain/workflow" testmocks "github.com/awf-project/cli/internal/testutil/mocks" "github.com/awf-project/cli/pkg/display" @@ -268,6 +269,16 @@ func TestBuild_WithAuditWriter(t *testing.T) { assert.NotNil(t, result.ExecService, "ExecService must be non-nil when an audit writer is provided") } +func TestBuild_WithRecorder(t *testing.T) { + recorder := &mockRecorder{} + setup := buildMinimalSetup(application.WithRecorder(recorder)) + + result, err := setup.Build(context.Background()) + + require.NoError(t, err) + assert.NotNil(t, result.ExecService, "ExecService must be non-nil when a recorder is provided") +} + func TestBuild_PluginGating_DisabledProvider(t *testing.T) { // Only notify is enabled; github and http are disabled. checker := newStubPluginChecker(map[string]bool{ @@ -385,3 +396,19 @@ func TestWithDisplayRendererFactory_Wired(t *testing.T) { type nopWriter struct{} func (n *nopWriter) Write(p []byte) (int, error) { return len(p), nil } + +// mockRecorder is a minimal test stub for ports.Recorder. +type mockRecorder struct{} + +func (m *mockRecorder) Record(ctx context.Context, event transcript.ExchangeEvent) error { + return nil +} + +func (m *mockRecorder) Subscribe() (<-chan transcript.ExchangeEvent, func()) { + ch := make(chan transcript.ExchangeEvent) + return ch, func() { close(ch) } +} + +func (m *mockRecorder) Close() error { + return nil +} diff --git a/internal/application/execution_tool_proxy.go b/internal/application/execution_tool_proxy.go index 2dffa625..4d133b71 100644 --- a/internal/application/execution_tool_proxy.go +++ b/internal/application/execution_tool_proxy.go @@ -33,8 +33,12 @@ func (s *ExecutionService) startToolProxy( opts map[string]any, resolvedProvider string, provider ports.AgentProvider, + execCtx *workflow.ExecutionContext, ) (cleanup func() error, err error) { - return startToolProxyImpl(ctx, s.toolProxy, s.logger, step, opts, resolvedProvider, provider) + // F106 FR-008: capture in-process (HTTP router) tool calls with fidelity:"router", + // correlated to this run. recorderFor(ctx) routes sub-workflow tool calls to the + // child transcript; execCtx.WorkflowID is the unified run id. + return startToolProxyImpl(ctx, s.toolProxy, s.logger, step, opts, resolvedProvider, provider, s.recorderFor(ctx), execCtx.WorkflowID) } // startConversationToolProxy starts the MCP tool proxy for a conversation step. It is @@ -48,13 +52,16 @@ func startConversationToolProxy( opts map[string]any, resolvedProvider string, provider ports.AgentProvider, + recorder ports.Recorder, + runID string, ) (cleanup func() error, err error) { - return startToolProxyImpl(ctx, proxy, logger, step, opts, resolvedProvider, provider) + return startToolProxyImpl(ctx, proxy, logger, step, opts, resolvedProvider, provider, recorder, runID) } // startToolProxyImpl contains the actual start logic shared by single-turn and // conversation entry points. Splitting it out keeps the call sites readable and ensures // any policy change (e.g., HTTP vs stdio path selection) lands in exactly one place. +// recorder/runID wire the in-process ToolRouter for F106 router-fidelity tool capture. func startToolProxyImpl( ctx context.Context, proxy *tools.ProxyService, @@ -63,6 +70,8 @@ func startToolProxyImpl( opts map[string]any, resolvedProvider string, provider ports.AgentProvider, + recorder ports.Recorder, + runID string, ) (func() error, error) { if proxy == nil || step.MCPProxy == nil || !step.MCPProxy.Enable { return func() error { return nil }, nil @@ -78,6 +87,9 @@ func startToolProxyImpl( return func() error { return nil }, fmt.Errorf("start tool proxy (http): %w", startErr) } if router != nil { + // F106 FR-008: capture tool.call/tool.result at the router seam. + router.SetRecorder(recorder) + router.SetRunID(runID) if setter, ok := provider.(toolRouterSetter); ok { setter.SetToolRouter(router) } else { diff --git a/internal/application/execution_tool_proxy_recorder_test.go b/internal/application/execution_tool_proxy_recorder_test.go new file mode 100644 index 00000000..bfd8c1c2 --- /dev/null +++ b/internal/application/execution_tool_proxy_recorder_test.go @@ -0,0 +1,77 @@ +package application + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/application/tools" + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/awf-project/cli/internal/domain/workflow" + "github.com/awf-project/cli/internal/testutil/mocks" +) + +// fakeToolProvider is a minimal ports.ToolProvider exposing one tool for router tests. +type fakeToolProvider struct{} + +func (fakeToolProvider) ListTools(context.Context) ([]ports.ToolDefinition, error) { + return []ports.ToolDefinition{{Name: "ping", Source: "test"}}, nil +} + +func (fakeToolProvider) CallTool(context.Context, string, map[string]any) (*ports.ToolResult, error) { + return &ports.ToolResult{}, nil +} + +func (fakeToolProvider) Close(context.Context) error { return nil } + +// routerCapturingProvider satisfies ports.AgentProvider (via the nil embedded interface — +// startToolProxyImpl only calls SetToolRouter on the openai_compatible path) and captures +// the in-process router handed to it. +type routerCapturingProvider struct { + ports.AgentProvider + captured ports.ToolRouter +} + +func (p *routerCapturingProvider) SetToolRouter(r ports.ToolRouter) { p.captured = r } + +// TestStartToolProxy_WiresRecorderAndRunIDIntoRouter verifies F106 FR-008: the in-process +// HTTP ToolRouter created for openai_compatible is wired with the run's recorder and run id, +// so tool.call/tool.result are captured with fidelity:"router" and correlated to the run. +func TestStartToolProxy_WiresRecorderAndRunIDIntoRouter(t *testing.T) { + rec := &fakeRecorder{} + proxy := tools.NewProxyService( + mocks.NewMockCLIExecutor(), + mocks.NewMockTracer(), + mocks.NewMockLogger(), + func(tools.ProxyConfig) ([]ports.ToolProvider, error) { + return []ports.ToolProvider{fakeToolProvider{}}, nil + }, + ) + prov := &routerCapturingProvider{} + step := &workflow.Step{Name: "s", MCPProxy: &workflow.MCPProxyConfig{Enable: true, InterceptBuiltins: true}} + + cleanup, err := startToolProxyImpl( + context.Background(), proxy, mocks.NewMockLogger(), step, map[string]any{}, + "openai_compatible", prov, rec, "run-D", + ) + require.NoError(t, err) + defer func() { _ = cleanup() }() + + require.NotNil(t, prov.captured, "the router must be injected into the provider") + + _, err = prov.captured.CallTool(context.Background(), "ping", map[string]any{"x": 1}) + require.NoError(t, err) + + require.Len(t, rec.events, 2, "router must emit tool.call and tool.result") + assert.Equal(t, transcript.EventTypeToolCall, rec.events[0].Type) + assert.Equal(t, "run-D", rec.events[0].RunID) + assert.Equal(t, transcript.EventTypeToolResult, rec.events[1].Type) + assert.Equal(t, "run-D", rec.events[1].RunID) + + callPayload, ok := rec.events[0].Payload.(*transcript.ToolPayload) + require.True(t, ok) + assert.Equal(t, transcript.FidelityRouter, callPayload.Fidelity) +} diff --git a/internal/application/execution_tool_proxy_test.go b/internal/application/execution_tool_proxy_test.go index 38865b93..2496994e 100644 --- a/internal/application/execution_tool_proxy_test.go +++ b/internal/application/execution_tool_proxy_test.go @@ -116,7 +116,7 @@ func TestStartToolProxyImpl_NoopWhenProxyNil(t *testing.T) { opts := map[string]any{} step := &workflow.Step{MCPProxy: &workflow.MCPProxyConfig{Enable: true, InterceptBuiltins: true}} - cleanup, err := startToolProxyImpl(context.Background(), nil, mocks.NewMockLogger(), step, opts, "claude", nil) + cleanup, err := startToolProxyImpl(context.Background(), nil, mocks.NewMockLogger(), step, opts, "claude", nil, nil, "") require.NoError(t, err) require.NotNil(t, cleanup) @@ -136,7 +136,7 @@ func TestStartToolProxyImpl_NoopWhenDisabled(t *testing.T) { opts := map[string]any{} step := &workflow.Step{MCPProxy: &workflow.MCPProxyConfig{Enable: false, InterceptBuiltins: true}} - cleanup, err := startToolProxyImpl(context.Background(), proxy, mocks.NewMockLogger(), step, opts, "claude", nil) + cleanup, err := startToolProxyImpl(context.Background(), proxy, mocks.NewMockLogger(), step, opts, "claude", nil, nil, "") require.NoError(t, err) assert.NoError(t, cleanup()) @@ -156,7 +156,7 @@ func TestStartToolProxyImpl_OpenAICompatibleUsesHTTPPath(t *testing.T) { opts := map[string]any{} step := &workflow.Step{MCPProxy: &workflow.MCPProxyConfig{Enable: true, InterceptBuiltins: true}} - cleanup, err := startToolProxyImpl(context.Background(), proxy, mocks.NewMockLogger(), step, opts, "openai_compatible", nil) + cleanup, err := startToolProxyImpl(context.Background(), proxy, mocks.NewMockLogger(), step, opts, "openai_compatible", nil, nil, "") require.NoError(t, err) assert.NoError(t, cleanup()) diff --git a/internal/application/subworkflow_executor.go b/internal/application/subworkflow_executor.go index 221ae0e8..a9fda360 100644 --- a/internal/application/subworkflow_executor.go +++ b/internal/application/subworkflow_executor.go @@ -4,9 +4,13 @@ import ( "context" "errors" "fmt" + "path/filepath" "strings" "time" + "github.com/google/uuid" + + "github.com/awf-project/cli/internal/domain/ports" "github.com/awf-project/cli/internal/domain/workflow" ) @@ -101,6 +105,28 @@ func (s *ExecutionService) executeCallWorkflowStep( } } + // T050: construct child recorder for transcript linkage (one Recorder per sub-run) + childRunID := uuid.New().String() + var childRecorder ports.Recorder + if s.recorderFactory != nil { + var recErr error + childRecorder, recErr = s.recorderFactory( + filepath.Join(s.transcriptBaseDir(), childRunID+".jsonl"), + ) + if recErr != nil && s.logger != nil { + s.logger.Warn("child recorder creation failed", "error", recErr, "run_id", childRunID) + } + } + if childRecorder != nil { + defer func() { + // Close flushes the child transcript's final buffered line; surface a failure + // as a warning rather than silently dropping the tail of the sub-run's file. + if cerr := childRecorder.Close(); cerr != nil && s.logger != nil { + s.logger.Warn("child recorder close failed", "error", cerr, "run_id", childRunID) + } + }() + } + // 4. Push current workflow to call stack execCtx.PushCallStack(wf.Name) defer execCtx.PopCallStack() @@ -134,9 +160,15 @@ func (s *ExecutionService) executeCallWorkflowStep( s.logger.Warn("pre-hook failed", "step", step.Name, "error", err) } - // 7. Execute sub-workflow with parent call stack for circular detection + // 7. Execute sub-workflow with parent call stack for circular detection. + // Route the child run's transcript emission to its own Recorder (one file per + // sub-run, F106 US5) via the context, and thread the child/parent run identity so + // the child's events carry RunID=childRunID and ParentRunID=parent's WorkflowID. s.logger.Info("executing sub-workflow", "step", step.Name, "workflow", config.Workflow) - subResult, execErr := s.runWithCallStack(subCtx, config.Workflow, subInputs, execCtx.CallStack) + s.emitTranscriptCallWorkflowStarted(ctx, execCtx, step, childRunID) + subCtx = withRecorder(subCtx, childRecorder) + subResult, execErr := s.runWithCallStack(subCtx, config.Workflow, subInputs, execCtx.CallStack, childRunID, execCtx.WorkflowID) + s.emitTranscriptCallWorkflowCompleted(ctx, execCtx, step, childRunID, execErr) // Create sub-workflow result for tracking result := workflow.NewSubWorkflowResult(config.Workflow) diff --git a/internal/application/tools/router.go b/internal/application/tools/router.go index d5d6d2e9..aec281aa 100644 --- a/internal/application/tools/router.go +++ b/internal/application/tools/router.go @@ -7,8 +7,11 @@ import ( "sync" "time" + "github.com/google/uuid" + domerrors "github.com/awf-project/cli/internal/domain/errors" "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" ) var _ ports.ToolRouter = (*Router)(nil) @@ -26,6 +29,19 @@ type Router struct { providers []ports.ToolProvider tracer ports.Tracer logger ports.Logger + recorder ports.Recorder + runID string +} + +// SetRecorder attaches an optional transcript recorder for capturing tool.call / tool.result events. +func (r *Router) SetRecorder(rec ports.Recorder) { + r.recorder = rec +} + +// SetRunID sets the workflow run identifier stamped onto emitted tool.call / tool.result +// events so tool exchanges can be correlated to their originating workflow run. +func (r *Router) SetRunID(id string) { + r.runID = id } func NewRouter(tracer ports.Tracer, logger ports.Logger) *Router { @@ -100,6 +116,20 @@ func (r *Router) CallTool(ctx context.Context, name string, args map[string]any) span.SetAttribute("tool.source", entry.definition.Source) + var callID string + if r.recorder != nil { + callID = uuid.New().String() + callEvent := transcript.ExchangeEvent{ + Type: transcript.EventTypeToolCall, + RunID: r.runID, + Timestamp: time.Now(), + Payload: &transcript.ToolPayload{Name: name, CallID: callID, Input: args, Fidelity: transcript.FidelityRouter}, + } + if recErr := r.recorder.Record(ctx, callEvent); recErr != nil { + r.logger.Warn("transcript record warning", "error", recErr, "event", transcript.EventTypeToolCall) + } + } + result, err := entry.provider.CallTool(ctx, name, args) durationMs := time.Since(start).Milliseconds() @@ -109,6 +139,22 @@ func (r *Router) CallTool(ctx context.Context, name string, args map[string]any) span.RecordError(err) } + if r.recorder != nil { + resultPayload := &transcript.ToolPayload{Name: name, CallID: callID, Input: args, Fidelity: transcript.FidelityRouter, Output: result} + if err != nil { + resultPayload.Error = err.Error() + } + resultEvent := transcript.ExchangeEvent{ + Type: transcript.EventTypeToolResult, + RunID: r.runID, + Timestamp: time.Now(), + Payload: resultPayload, + } + if recErr := r.recorder.Record(ctx, resultEvent); recErr != nil { + r.logger.Warn("transcript record warning", "error", recErr, "event", transcript.EventTypeToolResult) + } + } + fields := []any{ "tool", name, "source", entry.definition.Source, diff --git a/internal/application/tools/router_test.go b/internal/application/tools/router_test.go index f58c944f..8561de27 100644 --- a/internal/application/tools/router_test.go +++ b/internal/application/tools/router_test.go @@ -77,6 +77,7 @@ func (t *mockTracer) Start(ctx context.Context, spanName string) (context.Contex type mockLogger struct { mu sync.Mutex infoLogs []string + warnLogs []string errorLogs []string fields []map[string]any } @@ -90,7 +91,13 @@ func (l *mockLogger) Info(msg string, fields ...any) { l.fields = append(l.fields, parseFields(fields)) } } -func (l *mockLogger) Warn(msg string, fields ...any) {} + +func (l *mockLogger) Warn(msg string, fields ...any) { + l.mu.Lock() + defer l.mu.Unlock() + l.warnLogs = append(l.warnLogs, msg) +} + func (l *mockLogger) Error(msg string, fields ...any) { l.mu.Lock() defer l.mu.Unlock() diff --git a/internal/application/tools/router_transcript_test.go b/internal/application/tools/router_transcript_test.go new file mode 100644 index 00000000..6a8d892f --- /dev/null +++ b/internal/application/tools/router_transcript_test.go @@ -0,0 +1,349 @@ +package tools + +import ( + "context" + "errors" + "sync" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" +) + +// mockRecorder captures Record calls for test verification. +type mockRecorder struct { + mu sync.Mutex + events []transcript.ExchangeEvent + recordErr error + recordCalls int +} + +func (m *mockRecorder) Record(ctx context.Context, event transcript.ExchangeEvent) error { + m.mu.Lock() + defer m.mu.Unlock() + m.recordCalls++ + m.events = append(m.events, event) + return m.recordErr +} + +func (m *mockRecorder) Subscribe() (<-chan transcript.ExchangeEvent, func()) { + ch := make(chan transcript.ExchangeEvent) + return ch, func() { close(ch) } +} + +func (m *mockRecorder) Close() error { + return nil +} + +func (m *mockRecorder) getEvents() []transcript.ExchangeEvent { + m.mu.Lock() + defer m.mu.Unlock() + result := make([]transcript.ExchangeEvent, len(m.events)) + copy(result, m.events) + return result +} + +// TestRouter_SetRecorder stores the recorder in the router field. +func TestRouter_SetRecorder(t *testing.T) { + tracer := &mockTracer{} + logger := &mockLogger{} + router := NewRouter(tracer, logger) + + rec := &mockRecorder{} + router.SetRecorder(rec) + + assert.NotNil(t, router.recorder) + assert.Equal(t, rec, router.recorder) +} + +// TestRouter_SetRunID_PropagatesToEmittedEvents verifies the run ID set via SetRunID +// is stamped onto both the tool.call and tool.result events so tool exchanges can be +// correlated to their originating workflow run. +func TestRouter_SetRunID_PropagatesToEmittedEvents(t *testing.T) { + tracer := &mockTracer{} + logger := &mockLogger{} + router := NewRouter(tracer, logger) + + rec := &mockRecorder{} + router.SetRecorder(rec) + router.SetRunID("run-1234") + + provider := &mockToolProvider{ + tools: []ports.ToolDefinition{ + {Name: "tool1", Source: "test"}, + }, + } + require.NoError(t, router.Register(context.Background(), provider)) + + _, err := router.CallTool(context.Background(), "tool1", map[string]any{}) + require.NoError(t, err) + + events := rec.getEvents() + require.Len(t, events, 2) + assert.Equal(t, "run-1234", events[0].RunID, "tool.call must carry the run ID") + assert.Equal(t, "run-1234", events[1].RunID, "tool.result must carry the run ID") +} + +// TestRouter_NilRecorderIsNoOp verifies no transcript emission when recorder unset. +func TestRouter_NilRecorderIsNoOp(t *testing.T) { + tracer := &mockTracer{} + logger := &mockLogger{} + router := NewRouter(tracer, logger) + + provider := &mockToolProvider{ + tools: []ports.ToolDefinition{ + {Name: "tool1", Source: "test"}, + }, + } + router.Register(context.Background(), provider) + + result, err := router.CallTool(context.Background(), "tool1", map[string]any{}) + + assert.NoError(t, err) + assert.NotNil(t, result) +} + +// TestRouter_BuiltinToolEmitsRouterFidelityPair verifies tool.call + tool.result events +// with Fidelity:"router" when recorder is set. +func TestRouter_BuiltinToolEmitsRouterFidelityPair(t *testing.T) { + tracer := &mockTracer{} + logger := &mockLogger{} + router := NewRouter(tracer, logger) + + rec := &mockRecorder{} + router.SetRecorder(rec) + + provider := &mockToolProvider{ + tools: []ports.ToolDefinition{ + {Name: "builtintool", Source: "builtin"}, + }, + } + router.Register(context.Background(), provider) + + result, err := router.CallTool(context.Background(), "builtintool", map[string]any{"key": "value"}) + + require.NoError(t, err) + assert.NotNil(t, result) + + events := rec.getEvents() + require.Len(t, events, 2, "must emit exactly one tool.call + one tool.result event") + + callEvent := events[0] + resultEvent := events[1] + + assert.Equal(t, transcript.EventTypeToolCall, callEvent.Type) + assert.Equal(t, transcript.EventTypeToolResult, resultEvent.Type) + + require.IsType(t, (*transcript.ToolPayload)(nil), callEvent.Payload) + require.IsType(t, (*transcript.ToolPayload)(nil), resultEvent.Payload) + + callPayload := callEvent.Payload.(*transcript.ToolPayload) + resultPayload := resultEvent.Payload.(*transcript.ToolPayload) + + assert.Equal(t, "builtintool", callPayload.Name) + assert.Equal(t, transcript.FidelityRouter, callPayload.Fidelity, "tool.call must have Fidelity:router") + assert.Equal(t, map[string]any{"key": "value"}, callPayload.Input) + assert.NotEmpty(t, callPayload.CallID, "tool.call must have CallID") + + assert.Equal(t, "builtintool", resultPayload.Name) + assert.Equal(t, transcript.FidelityRouter, resultPayload.Fidelity, "tool.result must have Fidelity:router") + assert.Equal(t, callPayload.CallID, resultPayload.CallID, "both events must carry same CallID") + assert.NotNil(t, resultPayload.Output, "tool.result must have Output") +} + +// TestRouter_ToolCallAndResultCarrySameCallID verifies CallID consistency. +func TestRouter_ToolCallAndResultCarrySameCallID(t *testing.T) { + tracer := &mockTracer{} + logger := &mockLogger{} + router := NewRouter(tracer, logger) + + rec := &mockRecorder{} + router.SetRecorder(rec) + + provider := &mockToolProvider{ + tools: []ports.ToolDefinition{ + {Name: "tool1", Source: "src"}, + }, + } + router.Register(context.Background(), provider) + + router.CallTool(context.Background(), "tool1", map[string]any{}) + + events := rec.getEvents() + require.Len(t, events, 2) + + callID1 := events[0].Payload.(*transcript.ToolPayload).CallID + callID2 := events[1].Payload.(*transcript.ToolPayload).CallID + + assert.Equal(t, callID1, callID2, "tool.call and tool.result must carry same CallID") +} + +// TestRouter_RecorderErrorsLoggedAsWarn verifies Record errors don't propagate. +func TestRouter_RecorderErrorsLoggedAsWarn(t *testing.T) { + tracer := &mockTracer{} + logger := &mockLogger{} + router := NewRouter(tracer, logger) + + recErr := errors.New("recorder failed") + rec := &mockRecorder{recordErr: recErr} + router.SetRecorder(rec) + + provider := &mockToolProvider{ + tools: []ports.ToolDefinition{ + {Name: "tool1", Source: "src"}, + }, + } + router.Register(context.Background(), provider) + + result, err := router.CallTool(context.Background(), "tool1", map[string]any{}) + + assert.NoError(t, err, "CallTool must not propagate recorder errors") + assert.NotNil(t, result, "CallTool must return result despite recorder error") + + logger.mu.Lock() + warnLogs := logger.warnLogs + logger.mu.Unlock() + assert.NotEmpty(t, warnLogs, "recorder error must be logged at WARN level (T049 convention)") +} + +// TestRouter_ToolResultPayloadIncludesOutput verifies tool.result contains output. +func TestRouter_ToolResultPayloadIncludesOutput(t *testing.T) { + tracer := &mockTracer{} + logger := &mockLogger{} + router := NewRouter(tracer, logger) + + rec := &mockRecorder{} + router.SetRecorder(rec) + + provider := &mockToolProvider{ + tools: []ports.ToolDefinition{ + {Name: "tool1", Source: "src"}, + }, + } + router.Register(context.Background(), provider) + + result, err := router.CallTool(context.Background(), "tool1", map[string]any{}) + + require.NoError(t, err) + require.NotNil(t, result) + + events := rec.getEvents() + require.Len(t, events, 2) + + resultPayload := events[1].Payload.(*transcript.ToolPayload) + assert.NotNil(t, resultPayload.Output, "tool.result payload must include Output field") + assert.Equal(t, result, resultPayload.Output, "tool.result Output must match CallTool return value") +} + +// TestRouter_ToolResultPayloadIncludesError verifies tool.result captures error string on failure. +func TestRouter_ToolResultPayloadIncludesError(t *testing.T) { + tracer := &mockTracer{} + logger := &mockLogger{} + router := NewRouter(tracer, logger) + + rec := &mockRecorder{} + router.SetRecorder(rec) + + testErr := errors.New("tool execution failed") + provider := &mockToolProvider{ + tools: []ports.ToolDefinition{ + {Name: "tool1", Source: "src"}, + }, + callToolErr: testErr, + } + router.Register(context.Background(), provider) + + result, err := router.CallTool(context.Background(), "tool1", map[string]any{}) + + assert.Error(t, err) + assert.Nil(t, result) + + events := rec.getEvents() + require.Len(t, events, 2) + + resultPayload := events[1].Payload.(*transcript.ToolPayload) + assert.Equal(t, "tool execution failed", resultPayload.Error) +} + +// TestRouter_FidelityDistinctFromAgentEmitted verifies router Fidelity is distinct from agent_emitted. +func TestRouter_FidelityDistinctFromAgentEmitted(t *testing.T) { + tracer := &mockTracer{} + logger := &mockLogger{} + router := NewRouter(tracer, logger) + + rec := &mockRecorder{} + router.SetRecorder(rec) + + provider := &mockToolProvider{ + tools: []ports.ToolDefinition{ + {Name: "tool1", Source: "src"}, + }, + } + router.Register(context.Background(), provider) + + router.CallTool(context.Background(), "tool1", map[string]any{}) + + events := rec.getEvents() + require.Len(t, events, 2) + + callPayload := events[0].Payload.(*transcript.ToolPayload) + resultPayload := events[1].Payload.(*transcript.ToolPayload) + + assert.Equal(t, transcript.FidelityRouter, callPayload.Fidelity) + assert.Equal(t, transcript.FidelityRouter, resultPayload.Fidelity) + assert.NotEqual(t, transcript.FidelityAgentEmitted, callPayload.Fidelity) + assert.NotEqual(t, transcript.FidelityAgentEmitted, resultPayload.Fidelity) +} + +// BenchmarkRouterCallTool_WithRecorder measures overhead of transcript recording. +// Should be <5% overhead compared to baseline (no recorder). +func BenchmarkRouterCallTool_WithRecorder(b *testing.B) { + tracer := &mockTracer{} + logger := &mockLogger{} + router := NewRouter(tracer, logger) + + rec := &mockRecorder{} + router.SetRecorder(rec) + + provider := &mockToolProvider{ + tools: []ports.ToolDefinition{ + {Name: "benchedtool", Source: "bench"}, + }, + } + router.Register(context.Background(), provider) + + args := map[string]any{"key": "value"} + + b.ReportAllocs() + b.ResetTimer() + + for range b.N { + router.CallTool(context.Background(), "benchedtool", args) + } +} + +// BenchmarkRouterCallTool_WithoutRecorder measures baseline overhead without recorder. +func BenchmarkRouterCallTool_WithoutRecorder(b *testing.B) { + tracer := &mockTracer{} + logger := &mockLogger{} + router := NewRouter(tracer, logger) + + provider := &mockToolProvider{ + tools: []ports.ToolDefinition{ + {Name: "benchedtool", Source: "bench"}, + }, + } + router.Register(context.Background(), provider) + + args := map[string]any{"key": "value"} + + b.ReportAllocs() + b.ResetTimer() + + for range b.N { + router.CallTool(context.Background(), "benchedtool", args) + } +} diff --git a/internal/domain/ports/agent_output_normalizer.go b/internal/domain/ports/agent_output_normalizer.go new file mode 100644 index 00000000..54ec6070 --- /dev/null +++ b/internal/domain/ports/agent_output_normalizer.go @@ -0,0 +1,16 @@ +package ports + +import "github.com/awf-project/cli/internal/domain/transcript" + +// AgentOutputNormalizer converts a provider's raw CLI output (NDJSON) into the canonical +// transcript ContentBlock vocabulary for F106 message.assistant events. It is the seam +// that absorbs per-provider output divergence (Claude, Codex, Gemini, Copilot, OpenAI +// compatible) in a single place (SC-002), implemented in the infrastructure agents layer. +// +// Implementations must be total and panic-free: an unknown provider, empty output, or +// unparseable lines yield no blocks rather than an error. +type AgentOutputNormalizer interface { + // Normalize maps the provider's raw output into ordered ContentBlocks. The provider + // argument is the resolved provider name (matching the agent registry key). + Normalize(provider string, rawOutput []byte) []transcript.ContentBlock +} diff --git a/internal/domain/ports/recorder.go b/internal/domain/ports/recorder.go new file mode 100644 index 00000000..87e9b388 --- /dev/null +++ b/internal/domain/ports/recorder.go @@ -0,0 +1,30 @@ +package ports + +import ( + "context" + "errors" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +// ErrInvalidEvent is returned when a Record call is made with a zero-Type event. +var ErrInvalidEvent = errors.New("invalid event: Type must not be empty") + +// RecorderFactory creates a new Recorder that writes to path. +// The parent directory of path must already exist. +type RecorderFactory func(path string) (Recorder, error) + +// Recorder is the port for writing and broadcasting transcript exchange events. +type Recorder interface { + // Record appends the event to the transcript. Returns ErrInvalidEvent when + // event.Type is empty. Honors ctx cancellation. + Record(ctx context.Context, event transcript.ExchangeEvent) error + + // Subscribe returns a channel that receives every subsequent recorded event + // and a cancel closure that unregisters the subscriber and closes the channel. + // Calling cancel more than once is a no-op. + Subscribe() (ch <-chan transcript.ExchangeEvent, cancel func()) + + // Close flushes and releases resources. Idempotent: second call returns nil. + Close() error +} diff --git a/internal/domain/ports/recorder_contract_test.go b/internal/domain/ports/recorder_contract_test.go new file mode 100644 index 00000000..61fb862d --- /dev/null +++ b/internal/domain/ports/recorder_contract_test.go @@ -0,0 +1,103 @@ +package ports_test + +import ( + "context" + "sync" + "testing" + + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeRecorder is a minimal in-memory Recorder for contract verification. +type fakeRecorder struct { + mu sync.Mutex + closed bool + subscribers []chan transcript.ExchangeEvent +} + +func (f *fakeRecorder) Record(ctx context.Context, event transcript.ExchangeEvent) error { + if event.Type == "" { + return ports.ErrInvalidEvent + } + if err := ctx.Err(); err != nil { + return err + } + f.mu.Lock() + defer f.mu.Unlock() + for _, ch := range f.subscribers { + ch <- event + } + return nil +} + +func (f *fakeRecorder) Subscribe() (<-chan transcript.ExchangeEvent, func()) { + ch := make(chan transcript.ExchangeEvent, 16) + f.mu.Lock() + f.subscribers = append(f.subscribers, ch) + f.mu.Unlock() + + var once sync.Once + cancel := func() { + once.Do(func() { + f.mu.Lock() + defer f.mu.Unlock() + for i, s := range f.subscribers { + if s == ch { + f.subscribers = append(f.subscribers[:i], f.subscribers[i+1:]...) + close(ch) + break + } + } + }) + } + return ch, cancel +} + +func (f *fakeRecorder) Close() error { + f.mu.Lock() + defer f.mu.Unlock() + if f.closed { + return nil + } + f.closed = true + return nil +} + +func TestRecorderContract_FakeIdempotentClose(t *testing.T) { + rec := &fakeRecorder{} + require.NoError(t, rec.Close()) + assert.NoError(t, rec.Close()) +} + +func TestRecorderContract_SubscribeCancelIdempotent(t *testing.T) { + rec := &fakeRecorder{} + _, cancel := rec.Subscribe() + cancel() + assert.NotPanics(t, func() { cancel() }) +} + +func TestRecorderContract_RecordNilEventReturnsError(t *testing.T) { + rec := &fakeRecorder{} + err := rec.Record(context.Background(), transcript.ExchangeEvent{}) + assert.ErrorIs(t, err, ports.ErrInvalidEvent) +} + +// TestRecorderContract_RecordRespectsCancelation verifies that Record respects +// context cancellation and returns the context error. +func TestRecorderContract_RecordRespectsCancelation(t *testing.T) { + rec := &fakeRecorder{} + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + event := transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + } + + err := rec.Record(ctx, event) + + assert.ErrorIs(t, err, context.Canceled) +} diff --git a/internal/domain/transcript/architecture_test.go b/internal/domain/transcript/architecture_test.go new file mode 100644 index 00000000..9f5e51e3 --- /dev/null +++ b/internal/domain/transcript/architecture_test.go @@ -0,0 +1,77 @@ +package transcript_test + +import ( + "go/parser" + "go/token" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestArchitecture_DomainTranscript_NoForbiddenImports(t *testing.T) { + fset := token.NewFileSet() + + filterNonTest := func(info os.FileInfo) bool { + return !strings.HasSuffix(info.Name(), "_test.go") + } + + //nolint:staticcheck // SA1019: ParseDir suffices for an import-only AST scan; build-tag precision is unnecessary here. + pkgs, err := parser.ParseDir(fset, ".", filterNonTest, parser.ImportsOnly) + if err != nil { + t.Fatalf("failed to parse package directory: %v", err) + } + + if len(pkgs) == 0 { + t.Fatal("no Go files found in package directory") + } + + allowedImports := map[string]struct{}{ + "encoding/json": {}, + "errors": {}, + "fmt": {}, + "time": {}, + } + + for _, pkg := range pkgs { + for name, file := range pkg.Files { + for _, imp := range file.Imports { + path := strings.Trim(imp.Path.Value, `"`) + if _, ok := allowedImports[path]; !ok { + t.Errorf("disallowed import %q in %s", path, filepath.Base(name)) + } + } + } + } +} + +func TestArchitecture_DomainTranscript_NoTestImportsLeak(t *testing.T) { + fset := token.NewFileSet() + + filterTestOnly := func(info os.FileInfo) bool { + return strings.HasSuffix(info.Name(), "_test.go") + } + + //nolint:staticcheck // SA1019: ParseDir suffices for an import-only AST scan; build-tag precision is unnecessary here. + pkgs, err := parser.ParseDir(fset, ".", filterTestOnly, parser.ImportsOnly) + if err != nil { + t.Fatalf("failed to parse test files: %v", err) + } + + forbiddenPrefixes := []string{ + "github.com/awf-project/cli/internal/infrastructure", + } + + for _, pkg := range pkgs { + for name, file := range pkg.Files { + for _, imp := range file.Imports { + path := strings.Trim(imp.Path.Value, `"`) + for _, prefix := range forbiddenPrefixes { + if path == prefix || strings.HasPrefix(path, prefix+"/") { + t.Errorf("test file %s imports infrastructure package %q; domain tests must not depend on infrastructure", filepath.Base(name), path) + } + } + } + } + } +} diff --git a/internal/domain/transcript/content.go b/internal/domain/transcript/content.go new file mode 100644 index 00000000..7dcc26a7 --- /dev/null +++ b/internal/domain/transcript/content.go @@ -0,0 +1,95 @@ +package transcript + +import ( + "encoding/json" + "errors" + "fmt" +) + +type BlockType string + +const ( + BlockTypeText BlockType = "text" + BlockTypeThinking BlockType = "thinking" + BlockTypeToolUse BlockType = "tool_use" + BlockTypeToolResult BlockType = "tool_result" + BlockTypeCommand BlockType = "command" + BlockTypeStream BlockType = "stream" +) + +type Fidelity string + +const ( + FidelityRouter Fidelity = "router" + FidelityAgentEmitted Fidelity = "agent_emitted" +) + +var ErrUnknownBlockType = errors.New("unknown block type") + +type ContentBlock struct { + Type BlockType `json:"type"` + Fidelity Fidelity `json:"fidelity"` + + Text string `json:"text,omitempty"` + Thinking string `json:"thinking,omitempty"` + + ToolName string `json:"tool_name,omitempty"` + ToolID string `json:"tool_id,omitempty"` + ToolInput any `json:"tool_input,omitempty"` + ToolContent any `json:"tool_content,omitempty"` + + Command string `json:"command,omitempty"` + Chunk string `json:"chunk,omitempty"` +} + +func (b ContentBlock) MarshalJSON() ([]byte, error) { //nolint:gocritic // hugeParam: value receiver required so json.Marshal(block) invokes custom marshaler + type wire struct { + Type BlockType `json:"type"` + Fidelity Fidelity `json:"fidelity"` + Text string `json:"text,omitempty"` + Thinking string `json:"thinking,omitempty"` + ToolName string `json:"tool_name,omitempty"` + ToolID string `json:"tool_id,omitempty"` + ToolInput any `json:"tool_input,omitempty"` + ToolContent any `json:"tool_content,omitempty"` + Command string `json:"command,omitempty"` + Chunk string `json:"chunk,omitempty"` + } + w := wire(b) //nolint:govet // wire has identical field layout; conversion is safe + data, err := json.Marshal(w) + if err != nil { + return nil, fmt.Errorf("marshaling content block: %w", err) + } + return data, nil +} + +func (b *ContentBlock) UnmarshalJSON(data []byte) error { + type rawBlock ContentBlock + var raw rawBlock + if err := json.Unmarshal(data, &raw); err != nil { + return fmt.Errorf("decoding content block: %w", err) + } + if !ValidBlockType(raw.Type) { + return fmt.Errorf("%w: %s", ErrUnknownBlockType, raw.Type) + } + *b = ContentBlock(raw) + return nil +} + +func ValidBlockType(bt BlockType) bool { + switch bt { + case BlockTypeText, BlockTypeThinking, BlockTypeToolUse, BlockTypeToolResult, BlockTypeCommand, BlockTypeStream: + return true + default: + return false + } +} + +func ValidFidelity(f Fidelity) bool { + switch f { + case FidelityRouter, FidelityAgentEmitted: + return true + default: + return false + } +} diff --git a/internal/domain/transcript/content_test.go b/internal/domain/transcript/content_test.go new file mode 100644 index 00000000..f2aa2846 --- /dev/null +++ b/internal/domain/transcript/content_test.go @@ -0,0 +1,443 @@ +package transcript_test + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +// TestBlockTypeEnumCoverage verifies all BlockType constants are defined with correct values +func TestBlockTypeEnumCoverage(t *testing.T) { + assert.Equal(t, transcript.BlockType("text"), transcript.BlockTypeText) + assert.Equal(t, transcript.BlockType("thinking"), transcript.BlockTypeThinking) + assert.Equal(t, transcript.BlockType("tool_use"), transcript.BlockTypeToolUse) + assert.Equal(t, transcript.BlockType("tool_result"), transcript.BlockTypeToolResult) + assert.Equal(t, transcript.BlockType("command"), transcript.BlockTypeCommand) + assert.Equal(t, transcript.BlockType("stream"), transcript.BlockTypeStream) +} + +// TestFidelityEnumCoverage verifies all Fidelity constants are defined with correct values +func TestFidelityEnumCoverage(t *testing.T) { + assert.Equal(t, transcript.Fidelity("router"), transcript.FidelityRouter) + assert.Equal(t, transcript.Fidelity("agent_emitted"), transcript.FidelityAgentEmitted) +} + +// TestValidBlockType_AllValidTypes verifies ValidBlockType returns true for all valid BlockType values +func TestValidBlockType_AllValidTypes(t *testing.T) { + tests := []struct { + name string + blockType transcript.BlockType + }{ + {name: "text", blockType: transcript.BlockTypeText}, + {name: "thinking", blockType: transcript.BlockTypeThinking}, + {name: "tool_use", blockType: transcript.BlockTypeToolUse}, + {name: "tool_result", blockType: transcript.BlockTypeToolResult}, + {name: "command", blockType: transcript.BlockTypeCommand}, + {name: "stream", blockType: transcript.BlockTypeStream}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.True(t, transcript.ValidBlockType(tt.blockType)) + }) + } +} + +// TestValidBlockType_InvalidType verifies ValidBlockType returns false for unknown BlockType +func TestValidBlockType_InvalidType(t *testing.T) { + assert.False(t, transcript.ValidBlockType(transcript.BlockType("invalid"))) + assert.False(t, transcript.ValidBlockType(transcript.BlockType(""))) + assert.False(t, transcript.ValidBlockType(transcript.BlockType("unknown.type"))) +} + +// TestValidFidelity_AllValidTypes verifies ValidFidelity returns true for all valid Fidelity values +func TestValidFidelity_AllValidTypes(t *testing.T) { + tests := []struct { + name string + fidelity transcript.Fidelity + }{ + {name: "router", fidelity: transcript.FidelityRouter}, + {name: "agent_emitted", fidelity: transcript.FidelityAgentEmitted}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.True(t, transcript.ValidFidelity(tt.fidelity)) + }) + } +} + +// TestValidFidelity_InvalidType verifies ValidFidelity returns false for unknown Fidelity +func TestValidFidelity_InvalidType(t *testing.T) { + assert.False(t, transcript.ValidFidelity(transcript.Fidelity("invalid"))) + assert.False(t, transcript.ValidFidelity(transcript.Fidelity(""))) + assert.False(t, transcript.ValidFidelity(transcript.Fidelity("unknown.fidelity"))) +} + +// TestContentBlockFields verifies all ContentBlock fields are accessible +func TestContentBlockFields(t *testing.T) { + block := transcript.ContentBlock{ + Type: transcript.BlockTypeToolUse, + Fidelity: transcript.FidelityRouter, + Text: "sample text", + Thinking: "sample thinking", + ToolName: "bash", + ToolID: "tool-123", + ToolInput: map[string]string{"cmd": "ls"}, + ToolContent: "output", + Command: "bash -c 'ls'", + Chunk: "chunk-1", + } + + assert.Equal(t, transcript.BlockTypeToolUse, block.Type) + assert.Equal(t, transcript.FidelityRouter, block.Fidelity) + assert.Equal(t, "sample text", block.Text) + assert.Equal(t, "sample thinking", block.Thinking) + assert.Equal(t, "bash", block.ToolName) + assert.Equal(t, "tool-123", block.ToolID) + assert.Equal(t, map[string]string{"cmd": "ls"}, block.ToolInput) + assert.Equal(t, "output", block.ToolContent) + assert.Equal(t, "bash -c 'ls'", block.Command) + assert.Equal(t, "chunk-1", block.Chunk) +} + +// TestContentBlockMarshalJSON_DeterministicFieldOrder verifies ContentBlock.MarshalJSON emits type,fidelity first +func TestContentBlockMarshalJSON_DeterministicFieldOrder(t *testing.T) { + block := transcript.ContentBlock{ + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityRouter, + Text: "hello world", + } + + data, err := json.Marshal(block) + require.NoError(t, err) + + jsonStr := string(data) + typePos := indexOfKey(jsonStr, "type") + fidelityPos := indexOfKey(jsonStr, "fidelity") + textPos := indexOfKey(jsonStr, "text") + + assert.True(t, typePos < fidelityPos, "type should come before fidelity") + assert.True(t, fidelityPos < textPos, "fidelity should come before text") +} + +// TestContentBlockMarshalJSON_AllBlockTypes verifies ContentBlock marshals all BlockType variants +func TestContentBlockMarshalJSON_AllBlockTypes(t *testing.T) { + tests := []struct { + name string + block transcript.ContentBlock + }{ + { + name: "text block", + block: transcript.ContentBlock{ + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityRouter, + Text: "sample text", + }, + }, + { + name: "thinking block", + block: transcript.ContentBlock{ + Type: transcript.BlockTypeThinking, + Fidelity: transcript.FidelityAgentEmitted, + Thinking: "internal reasoning", + }, + }, + { + name: "tool_use block", + block: transcript.ContentBlock{ + Type: transcript.BlockTypeToolUse, + Fidelity: transcript.FidelityRouter, + ToolName: "bash", + ToolID: "call-1", + ToolInput: map[string]string{"cmd": "echo test"}, + }, + }, + { + name: "tool_result block", + block: transcript.ContentBlock{ + Type: transcript.BlockTypeToolResult, + Fidelity: transcript.FidelityRouter, + ToolContent: "result data", + }, + }, + { + name: "command block", + block: transcript.ContentBlock{ + Type: transcript.BlockTypeCommand, + Fidelity: transcript.FidelityRouter, + Command: "ls -la", + }, + }, + { + name: "stream block", + block: transcript.ContentBlock{ + Type: transcript.BlockTypeStream, + Fidelity: transcript.FidelityAgentEmitted, + Chunk: "streaming data", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + data, err := json.Marshal(tt.block) + require.NoError(t, err) + assert.NotNil(t, data) + }) + } +} + +// TestContentBlockRoundTrip verifies json.Marshal → json.Unmarshal recovers equality +func TestContentBlockRoundTrip(t *testing.T) { + tests := []struct { + name string + block transcript.ContentBlock + }{ + { + name: "text block round-trip", + block: transcript.ContentBlock{ + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityRouter, + Text: "hello", + }, + }, + { + name: "tool_use block round-trip", + block: transcript.ContentBlock{ + Type: transcript.BlockTypeToolUse, + Fidelity: transcript.FidelityAgentEmitted, + ToolName: "bash", + ToolID: "id-123", + ToolInput: map[string]interface{}{"arg": "value"}, + }, + }, + { + name: "minimal block round-trip", + block: transcript.ContentBlock{ + Type: transcript.BlockTypeThinking, + Fidelity: transcript.FidelityRouter, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + data, err := json.Marshal(tt.block) + require.NoError(t, err) + + var recovered transcript.ContentBlock + err = json.Unmarshal(data, &recovered) + require.NoError(t, err) + + assert.Equal(t, tt.block, recovered) + }) + } +} + +// TestNULBytePreservation verifies NUL-byte survives JSON round-trip via standard escape +func TestNULBytePreservation(t *testing.T) { + nulText := "hello" + string(byte(0)) + "world" + block := transcript.ContentBlock{ + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityRouter, + Text: nulText, + } + + data, err := json.Marshal(block) + require.NoError(t, err) + + var recovered transcript.ContentBlock + err = json.Unmarshal(data, &recovered) + require.NoError(t, err) + + assert.Equal(t, nulText, recovered.Text) +} + +// TestMessagePayloadFields verifies all MessagePayload fields are accessible +func TestMessagePayloadFields(t *testing.T) { + blocks := []transcript.ContentBlock{ + { + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityRouter, + Text: "message text", + }, + } + payload := transcript.MessagePayload{ + Role: "user", + Blocks: blocks, + } + + assert.Equal(t, "user", payload.Role) + assert.Equal(t, blocks, payload.Blocks) +} + +// TestMessagePayloadMarshalUnmarshal verifies MessagePayload round-trip +func TestMessagePayloadMarshalUnmarshal(t *testing.T) { + payload := transcript.MessagePayload{ + Role: "assistant", + Blocks: []transcript.ContentBlock{ + { + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityAgentEmitted, + Text: "response", + }, + }, + } + + data, err := json.Marshal(payload) + require.NoError(t, err) + + var recovered transcript.MessagePayload + err = json.Unmarshal(data, &recovered) + require.NoError(t, err) + + assert.Equal(t, payload.Role, recovered.Role) + assert.Len(t, recovered.Blocks, len(payload.Blocks)) +} + +// TestStepPayloadFields verifies all StepPayload fields are accessible +func TestStepPayloadFields(t *testing.T) { + payload := transcript.StepPayload{ + Name: "step-1", + Kind: "shell", + Error: "failed", + Result: "error output", + } + + assert.Equal(t, "step-1", payload.Name) + assert.Equal(t, "shell", payload.Kind) + assert.Equal(t, "failed", payload.Error) + assert.Equal(t, "error output", payload.Result) +} + +// TestStepPayloadMarshalUnmarshal verifies StepPayload round-trip with optional fields +func TestStepPayloadMarshalUnmarshal(t *testing.T) { + tests := []struct { + name string + payload transcript.StepPayload + }{ + { + name: "with error and result", + payload: transcript.StepPayload{ + Name: "failing-step", + Kind: "command", + Error: "exit code 1", + Result: "failed", + }, + }, + { + name: "without optional fields", + payload: transcript.StepPayload{ + Name: "successful-step", + Kind: "script", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + data, err := json.Marshal(tt.payload) + require.NoError(t, err) + + var recovered transcript.StepPayload + err = json.Unmarshal(data, &recovered) + require.NoError(t, err) + + assert.Equal(t, tt.payload, recovered) + }) + } +} + +// TestToolPayloadFields verifies all ToolPayload fields are accessible +func TestToolPayloadFields(t *testing.T) { + payload := transcript.ToolPayload{ + Name: "bash", + CallID: "call-123", + Input: map[string]string{"cmd": "echo"}, + Output: "result", + Fidelity: transcript.FidelityRouter, + } + + assert.Equal(t, "bash", payload.Name) + assert.Equal(t, "call-123", payload.CallID) + assert.Equal(t, map[string]string{"cmd": "echo"}, payload.Input) + assert.Equal(t, "result", payload.Output) + assert.Equal(t, transcript.FidelityRouter, payload.Fidelity) +} + +// TestToolPayloadMarshalUnmarshal verifies ToolPayload round-trip +func TestToolPayloadMarshalUnmarshal(t *testing.T) { + payload := transcript.ToolPayload{ + Name: "grep", + CallID: "call-456", + Input: map[string]interface{}{"pattern": "error", "file": "app.log"}, + Output: []string{"line 1", "line 2"}, + Fidelity: transcript.FidelityAgentEmitted, + } + + data, err := json.Marshal(payload) + require.NoError(t, err) + + var recovered transcript.ToolPayload + err = json.Unmarshal(data, &recovered) + require.NoError(t, err) + + assert.Equal(t, payload.Name, recovered.Name) + assert.Equal(t, payload.CallID, recovered.CallID) + assert.Equal(t, payload.Fidelity, recovered.Fidelity) +} + +// TestContentBlockUnmarshalJSON_UnknownBlockType verifies unknown BlockType returns ErrUnknownBlockType +func TestContentBlockUnmarshalJSON_UnknownBlockType(t *testing.T) { + data := []byte(`{ + "type": "unknown.block", + "fidelity": "router" + }`) + + var block transcript.ContentBlock + err := json.Unmarshal(data, &block) + assert.ErrorIs(t, err, transcript.ErrUnknownBlockType) +} + +// TestMessagePayloadWithMultipleBlocks verifies MessagePayload with multiple ContentBlocks +func TestMessagePayloadWithMultipleBlocks(t *testing.T) { + payload := transcript.MessagePayload{ + Role: "assistant", + Blocks: []transcript.ContentBlock{ + { + Type: transcript.BlockTypeThinking, + Fidelity: transcript.FidelityAgentEmitted, + Thinking: "let me think", + }, + { + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityAgentEmitted, + Text: "here is my response", + }, + { + Type: transcript.BlockTypeToolUse, + Fidelity: transcript.FidelityAgentEmitted, + ToolName: "bash", + ToolID: "id-1", + ToolInput: map[string]string{"cmd": "ls"}, + }, + }, + } + + data, err := json.Marshal(payload) + require.NoError(t, err) + + var recovered transcript.MessagePayload + err = json.Unmarshal(data, &recovered) + require.NoError(t, err) + + assert.Len(t, recovered.Blocks, 3) + assert.Equal(t, transcript.BlockTypeThinking, recovered.Blocks[0].Type) + assert.Equal(t, transcript.BlockTypeText, recovered.Blocks[1].Type) + assert.Equal(t, transcript.BlockTypeToolUse, recovered.Blocks[2].Type) +} diff --git a/internal/domain/transcript/doc.go b/internal/domain/transcript/doc.go new file mode 100644 index 00000000..a2938348 --- /dev/null +++ b/internal/domain/transcript/doc.go @@ -0,0 +1,182 @@ +// Package transcript defines the canonical in-memory model for agent exchange +// transcripts produced by AWF workflow executions. Every interaction between AWF +// and an AI agent — user prompts, assistant replies, tool invocations, and workflow +// lifecycle events — is represented as an ExchangeEvent carrying a typed Payload. +// The model is serialization-first: all types implement or participate in custom +// JSON marshaling to produce a stable, forward-compatible wire format as required +// by FR-014, D2, and NFR-005. +// +// # Purpose +// +// This package occupies the domain layer of the hexagonal architecture. It defines +// the shared vocabulary used by the application layer to record and replay agent +// interactions, and by the infrastructure layer to persist transcripts as JSON lines. +// The package has no external dependencies beyond the Go standard library; it must +// remain importable without pulling in any third-party module so that consumers +// (transport adapters, test helpers, analytics tooling) can use it independently. +// +// An agent exchange transcript is an ordered sequence of ExchangeEvent values +// emitted during a workflow run. Each event carries a monotonically increasing Seq +// counter, a RunID identifying the workflow instance, optional ParentRunID and +// ChildRunID for nested call_workflow steps, an EventType discriminant, a Path +// marking the current step, an Iteration counter for retry loops, a UTC Timestamp, +// and an opaque Payload whose concrete type is determined by EventType. +// +// The closed vocabulary of EventType values documented below is exhaustive. Decoders +// that encounter an unrecognized EventType must return ErrUnknownEventType and +// discard the event rather than silently skipping it, so callers retain the option +// to surface unknown events as warnings. The same forward-compat contract (D2) +// applies to BlockType: decoders return ErrUnknownBlockType on unrecognized values. +// +// # Public Surface +// +// event.go — ExchangeEvent and the EventType closed vocabulary: +// +// - EventType — string alias discriminating the ten recognized event kinds. +// The complete closed vocabulary (exhaustive; no other values are valid): +// EventTypeRunStarted = "run.started" +// EventTypeRunCompleted = "run.completed" +// EventTypeStepStarted = "step.started" +// EventTypeStepCompleted = "step.completed" +// EventTypeStepCallWorkflowStarted = "step.call_workflow.started" +// EventTypeStepCallWorkflowCompleted = "step.call_workflow.completed" +// EventTypeMessageUser = "message.user" +// EventTypeMessageAssistant = "message.assistant" +// EventTypeToolCall = "tool.call" +// EventTypeToolResult = "tool.result" +// +// - ErrUnknownEventType — sentinel returned by decoders for unrecognized EventType +// strings. Callers must use errors.Is(err, ErrUnknownEventType) to distinguish +// forward-compat unknown-type events from structural parse failures. +// +// - ExchangeEvent — the top-level record type for one event in the transcript. +// Fields: Seq (uint64, monotone counter), RunID (string, UUID of the workflow +// run), ParentRunID (string, optional UUID of the enclosing parent run for nested +// workflows), ChildRunID (string, optional UUID of a spawned child run), +// Type (EventType), Path (string, dot-separated step path within the workflow +// definition), Iteration (int, zero-based retry counter), Timestamp (time.Time, +// UTC), Payload (any, typed by Type — see payload.go for the concrete dispatch). +// ExchangeEvent.MarshalJSON encodes the Payload using the concrete types defined +// in payload.go, selecting the target struct via a Type switch. +// ExchangeEvent.UnmarshalJSON decodes the raw payload bytes into the appropriate +// concrete Payload type based on the Type field value, returning ErrUnknownEventType +// if the Type is not in the closed vocabulary. +// +// content.go — ContentBlock, BlockType, and Fidelity: +// +// - BlockType — string alias discriminating the six recognized content block kinds. +// The complete closed vocabulary (exhaustive): +// BlockTypeText = "text" +// BlockTypeThinking = "thinking" +// BlockTypeToolUse = "tool_use" +// BlockTypeToolResult = "tool_result" +// BlockTypeCommand = "command" +// BlockTypeStream = "stream" +// +// - Fidelity — string alias indicating the provenance of a ContentBlock. Exactly +// two values are defined: +// FidelityRouter = "router" — block synthesized by the AWF router +// FidelityAgentEmitted = "agent_emitted" — block emitted directly by the AI agent +// +// - ErrUnknownBlockType — sentinel returned by validators and decoders for +// unrecognized BlockType strings. Usage mirrors ErrUnknownEventType exactly. +// +// - ContentBlock — a single typed content element within a message or tool payload. +// Fields: Type (BlockType), Fidelity (Fidelity), Text (string, for text and +// stream blocks), Thinking (string, for thinking blocks), ToolName (string), +// ToolID (string, opaque correlation token), ToolInput (any), ToolContent (any), +// Command (string, for command blocks), Chunk (string, for stream blocks). +// ContentBlock.MarshalJSON routes to the appropriate field subset based on Type; +// fields irrelevant to the block's Type are omitted via omitempty JSON tags. +// +// - ValidBlockType(bt BlockType) bool — returns true if bt is one of the six +// recognized BlockType values. Used by decoders before construction to gate +// ErrUnknownBlockType. Does not allocate; pure closed-set membership test. +// +// - ValidFidelity(f Fidelity) bool — returns true if f is FidelityRouter or +// FidelityAgentEmitted. Guards ContentBlock construction in strict-mode decoders. +// Does not allocate. +// +// payload.go — concrete Payload types keyed by EventType: +// +// - MessagePayload — payload for EventTypeMessageUser and EventTypeMessageAssistant. +// Fields: Role (string, "user" or "assistant"), Blocks ([]ContentBlock, ordered +// content sequence; nil and empty slice are semantically equivalent). +// +// - StepPayload — payload for EventTypeStepStarted, EventTypeStepCompleted, +// EventTypeStepCallWorkflowStarted, and EventTypeStepCallWorkflowCompleted. +// Fields: Name (string, step identifier matching the workflow YAML key), +// Kind (string, "step" or "parallel"), Error (string, omitempty), +// Result (any, omitempty, serialized step output on completion). +// +// - ToolPayload — payload for EventTypeToolCall and EventTypeToolResult. +// Fields: Name (string, tool name), CallID (string, opaque correlation ID +// shared between the call and result events), Input (any, decoded tool arguments), +// Output (any, decoded tool result; nil for EventTypeToolCall events), +// Fidelity (Fidelity, source attribution for the tool interaction). +// +// # Threat Model +// +// The transcript domain operates entirely within process memory during a workflow +// run. The primary threat surface is malformed or adversarially crafted JSON +// consumed during transcript replay or import from external files. +// +// - Unknown EventType / BlockType values (D2, NFR-005): Decoders MUST return +// ErrUnknownEventType or ErrUnknownBlockType rather than silently skipping or +// defaulting unrecognized values. This contract ensures forward-compat events +// surface as explicit errors rather than silently corrupting aggregation or replay. +// +// - Arbitrary Payload bytes: ExchangeEvent.UnmarshalJSON must not pass raw +// attacker-controlled bytes to any execution primitive. The Payload any field +// is decoded only into the concrete types defined in payload.go; no eval, no +// dynamic dispatch beyond the closed EventType switch. +// +// - Nil ContentBlock.Blocks slice: MessagePayload.Blocks may be nil for empty +// messages. Consumers must treat nil and empty slice identically (len == 0). +// Range over a nil slice is safe in Go; no nil-guard required at call sites. +// +// - Oversized payloads: This package sets no size limit on ContentBlock.Text or +// ToolPayload.Input. Callers operating over untrusted sources (file import, HTTP +// upload) must impose their own size caps before passing bytes to UnmarshalJSON. +// +// # Error Taxonomy +// +// This package exposes two sentinel errors: +// +// - ErrUnknownEventType: Returned by ExchangeEvent.UnmarshalJSON when the "type" +// JSON field contains a string not in the EventType closed vocabulary. Callers +// implementing permissive forward-compat decoding should check +// errors.Is(err, ErrUnknownEventType) and treat it as a warning rather than fatal, +// allowing the transcript stream to continue past unknown future event kinds. +// +// - ErrUnknownBlockType: Returned by ValidBlockType-gated code paths when a +// ContentBlock.Type value is not in the BlockType closed vocabulary. Identical +// forward-compat handling applies. Both errors wrap no inner error; callers that +// need the specific unknown string value should capture it before calling this +// package. +// +// Both sentinels are plain values (no wrapped context). Use errors.Is for matching. +// +// # Dependency Contract +// +// This package is permitted to import only the following standard library packages: +// +// - encoding/json — for MarshalJSON / UnmarshalJSON implementations. +// - errors — for ErrUnknownEventType and ErrUnknownBlockType sentinel definitions. +// - fmt — for error message formatting in UnmarshalJSON dispatch. +// - time — for ExchangeEvent.Timestamp (time.Time, always UTC). +// +// It MUST NOT import: +// +// - internal/application — hexagonal rule: domain must not depend on application. +// - internal/infrastructure — domain must not depend on infrastructure adapters. +// - internal/interfaces — domain must not depend on the interface/CLI layer. +// - Any third-party module — the domain layer must remain dependency-free to +// prevent transitive version conflicts and to enable use in lightweight analysis +// tooling outside the full AWF module graph. +// +// The test TestArchitecture_DomainTranscript_NoForbiddenImports (architecture_test.go) +// enforces this contract via AST import scanning at every test run. It asserts that +// across all non-test Go files in this package the only imports are the four stdlib +// packages listed above; any other import triggers a test failure. +package transcript diff --git a/internal/domain/transcript/doc_test.go b/internal/domain/transcript/doc_test.go new file mode 100644 index 00000000..aa99a3be --- /dev/null +++ b/internal/domain/transcript/doc_test.go @@ -0,0 +1,110 @@ +package transcript_test + +import ( + "go/ast" + "go/parser" + "go/token" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func loadTranscriptDocFile(t *testing.T) string { + t.Helper() + content, err := os.ReadFile("doc.go") + require.NoError(t, err, "failed to read doc.go") + return string(content) +} + +func TestDocDrift_CommentLineCount(t *testing.T) { + doc := loadTranscriptDocFile(t) + lines := strings.Split(doc, "\n") + + commentLines := 0 + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if strings.HasPrefix(trimmed, "//") { + commentLines++ + } + } + + assert.GreaterOrEqual(t, commentLines, 100, + "doc.go should have at least 100 non-blank comment lines (got %d)", commentLines) +} + +func TestDocDrift_RequiredSections(t *testing.T) { + doc := loadTranscriptDocFile(t) + + sections := []string{ + "# Purpose", + "# Public Surface", + "# Threat Model", + "# Error Taxonomy", + "# Dependency Contract", + } + + for _, section := range sections { + assert.Contains(t, doc, section, "should have %s section", section) + } +} + +func TestDocDrift_ExportedSymbols(t *testing.T) { + doc := loadTranscriptDocFile(t) + declared := transcriptExportedDecls(t) + + for name := range declared { + assert.Contains(t, doc, name, + "doc.go should document exported symbol %q", name) + } +} + +func transcriptExportedDecls(t *testing.T) map[string]struct{} { + t.Helper() + fset := token.NewFileSet() + //nolint:staticcheck // SA1019: ParseDir suffices for a declaration-name scan; build-tag precision is unnecessary here. + pkgs, err := parser.ParseDir(fset, ".", func(info os.FileInfo) bool { + return !strings.HasSuffix(info.Name(), "_test.go") + }, 0) + require.NoError(t, err, "failed to parse package directory") + + declared := make(map[string]struct{}) + for _, pkg := range pkgs { + for _, file := range pkg.Files { + for _, decl := range file.Decls { + transcriptCollectExportedDecl(decl, declared) + } + } + } + return declared +} + +func transcriptCollectExportedDecl(decl ast.Decl, out map[string]struct{}) { + switch d := decl.(type) { + case *ast.FuncDecl: + if d.Recv == nil && d.Name.IsExported() { + out[d.Name.Name] = struct{}{} + } + case *ast.GenDecl: + for _, spec := range d.Specs { + transcriptCollectExportedSpec(spec, out) + } + } +} + +func transcriptCollectExportedSpec(spec ast.Spec, out map[string]struct{}) { + switch s := spec.(type) { + case *ast.TypeSpec: + if s.Name.IsExported() { + out[s.Name.Name] = struct{}{} + } + case *ast.ValueSpec: + for _, n := range s.Names { + if n.IsExported() { + out[n.Name] = struct{}{} + } + } + } +} diff --git a/internal/domain/transcript/event.go b/internal/domain/transcript/event.go new file mode 100644 index 00000000..d2555352 --- /dev/null +++ b/internal/domain/transcript/event.go @@ -0,0 +1,168 @@ +package transcript + +import ( + "encoding/json" + "errors" + "fmt" + "time" +) + +type EventType string + +const ( + EventTypeRunStarted EventType = "run.started" + EventTypeRunCompleted EventType = "run.completed" + EventTypeStepStarted EventType = "step.started" + EventTypeStepCompleted EventType = "step.completed" + EventTypeStepCallWorkflowStarted EventType = "step.call_workflow.started" + EventTypeStepCallWorkflowCompleted EventType = "step.call_workflow.completed" + EventTypeMessageUser EventType = "message.user" + EventTypeMessageAssistant EventType = "message.assistant" + EventTypeToolCall EventType = "tool.call" + EventTypeToolResult EventType = "tool.result" +) + +var ErrUnknownEventType = errors.New("unknown event type") + +type ExchangeEvent struct { + Seq uint64 `json:"seq"` + RunID string `json:"run_id"` + ParentRunID string `json:"parent_run_id,omitempty"` + ChildRunID string `json:"child_run_id,omitempty"` + Type EventType `json:"type"` + Path string `json:"path"` + Iteration int `json:"iteration"` + Timestamp time.Time `json:"timestamp"` + Payload any `json:"payload"` +} + +func (e ExchangeEvent) MarshalJSON() ([]byte, error) { //nolint:gocritic // hugeParam: value receiver required so json.Marshal(event) invokes custom marshaler + type wire struct { + Seq uint64 `json:"seq"` + RunID string `json:"run_id"` + ParentRunID string `json:"parent_run_id,omitempty"` + ChildRunID string `json:"child_run_id,omitempty"` + Type EventType `json:"type"` + Path string `json:"path"` + Iteration int `json:"iteration"` + Timestamp time.Time `json:"timestamp"` + Payload any `json:"payload"` + } + w := wire(e) //nolint:govet // wire has identical field layout; conversion is safe + data, err := json.Marshal(w) + if err != nil { + return nil, fmt.Errorf("marshaling exchange event: %w", err) + } + return data, nil +} + +func (e *ExchangeEvent) UnmarshalJSON(data []byte) error { + type rawEvent struct { + Seq uint64 `json:"seq"` + RunID string `json:"run_id"` + ParentRunID string `json:"parent_run_id,omitempty"` + ChildRunID string `json:"child_run_id,omitempty"` + Type EventType `json:"type"` + Path string `json:"path"` + Iteration int `json:"iteration"` + Timestamp time.Time `json:"timestamp"` + Payload json.RawMessage `json:"payload"` + } + + var raw rawEvent + if err := json.Unmarshal(data, &raw); err != nil { + return fmt.Errorf("decoding exchange event: %w", err) + } + + if !validEventType(raw.Type) { + return fmt.Errorf("%w: %s", ErrUnknownEventType, raw.Type) + } + + e.Seq = raw.Seq + e.RunID = raw.RunID + e.ParentRunID = raw.ParentRunID + e.ChildRunID = raw.ChildRunID + e.Type = raw.Type + e.Path = raw.Path + e.Iteration = raw.Iteration + e.Timestamp = raw.Timestamp + + if len(raw.Payload) == 0 || string(raw.Payload) == "null" { + e.Payload = nil + return nil + } + + payload, err := dispatchPayload(raw.Payload) + if err != nil { + return fmt.Errorf("decoding payload: %w", err) + } + e.Payload = payload + return nil +} + +func validEventType(et EventType) bool { + switch et { + case EventTypeRunStarted, + EventTypeRunCompleted, + EventTypeStepStarted, + EventTypeStepCompleted, + EventTypeStepCallWorkflowStarted, + EventTypeStepCallWorkflowCompleted, + EventTypeMessageUser, + EventTypeMessageAssistant, + EventTypeToolCall, + EventTypeToolResult: + return true + default: + return false + } +} + +func dispatchPayload(raw json.RawMessage) (any, error) { + if len(raw) > 0 && raw[0] == '[' { + var blocks []ContentBlock + if err := json.Unmarshal(raw, &blocks); err != nil { + return nil, fmt.Errorf("decoding content block array: %w", err) + } + return blocks, nil + } + + var probe map[string]json.RawMessage + if err := json.Unmarshal(raw, &probe); err != nil { + var v any + if err2 := json.Unmarshal(raw, &v); err2 != nil { + return nil, fmt.Errorf("decoding payload value: %w", err2) + } + return v, nil + } + + if probe["role"] != nil { + var p MessagePayload + if err := json.Unmarshal(raw, &p); err != nil { + return nil, fmt.Errorf("decoding message payload: %w", err) + } + return &p, nil + } + + if probe["call_id"] != nil { + var p ToolPayload + if err := json.Unmarshal(raw, &p); err != nil { + return nil, fmt.Errorf("decoding tool payload: %w", err) + } + return &p, nil + } + + if probe["kind"] != nil { + var p StepPayload + if err := json.Unmarshal(raw, &p); err != nil { + return nil, fmt.Errorf("decoding step payload: %w", err) + } + return &p, nil + } + + var v any + if err := json.Unmarshal(raw, &v); err != nil { + return nil, fmt.Errorf("decoding generic payload: %w", err) + } + return v, nil +} diff --git a/internal/domain/transcript/event_test.go b/internal/domain/transcript/event_test.go new file mode 100644 index 00000000..b00a9ddf --- /dev/null +++ b/internal/domain/transcript/event_test.go @@ -0,0 +1,326 @@ +package transcript_test + +import ( + "encoding/json" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +// TestExchangeEventMarshalJSON_DeterministicFieldOrder verifies that ExchangeEvent.MarshalJSON +// emits fields in exact order: seq,run_id,parent_run_id,child_run_id,type,path,iteration,timestamp,payload +func TestExchangeEventMarshalJSON_DeterministicFieldOrder(t *testing.T) { + now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) + event := transcript.ExchangeEvent{ + Seq: 42, + RunID: "run-123", + ParentRunID: "parent-456", + ChildRunID: "child-789", + Type: transcript.EventTypeRunStarted, + Path: "step[0]", + Iteration: 1, + Timestamp: now, + Payload: &transcript.StepPayload{Name: "test-step"}, + } + + data, err := json.Marshal(event) + require.NoError(t, err) + + // Verify field order in JSON output + jsonStr := string(data) + seqPos := indexOfKey(jsonStr, "seq") + runIDPos := indexOfKey(jsonStr, "run_id") + parentRunIDPos := indexOfKey(jsonStr, "parent_run_id") + childRunIDPos := indexOfKey(jsonStr, "child_run_id") + typePos := indexOfKey(jsonStr, "type") + pathPos := indexOfKey(jsonStr, "path") + iterationPos := indexOfKey(jsonStr, "iteration") + timestampPos := indexOfKey(jsonStr, "timestamp") + payloadPos := indexOfKey(jsonStr, "payload") + + assert.True(t, seqPos < runIDPos, "seq should come before run_id") + assert.True(t, runIDPos < parentRunIDPos, "run_id should come before parent_run_id") + assert.True(t, parentRunIDPos < childRunIDPos, "parent_run_id should come before child_run_id") + assert.True(t, childRunIDPos < typePos, "child_run_id should come before type") + assert.True(t, typePos < pathPos, "type should come before path") + assert.True(t, pathPos < iterationPos, "path should come before iteration") + assert.True(t, iterationPos < timestampPos, "iteration should come before timestamp") + assert.True(t, timestampPos < payloadPos, "timestamp should come before payload") +} + +// TestExchangeEventMarshalJSON_OmitEmptyParentRunID verifies that empty ParentRunID is omitted +func TestExchangeEventMarshalJSON_OmitEmptyParentRunID(t *testing.T) { + now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) + event := transcript.ExchangeEvent{ + Seq: 1, + RunID: "run-123", + ParentRunID: "", + ChildRunID: "", + Type: transcript.EventTypeRunStarted, + Path: "step[0]", + Iteration: 0, + Timestamp: now, + Payload: nil, + } + + data, err := json.Marshal(event) + require.NoError(t, err) + + jsonStr := string(data) + assert.NotContains(t, jsonStr, "parent_run_id", "empty parent_run_id should be omitted") + assert.NotContains(t, jsonStr, "child_run_id", "empty child_run_id should be omitted") +} + +// TestExchangeEventMarshalJSON_IncludeNonEmptyParentRunID verifies that non-empty ParentRunID is included +func TestExchangeEventMarshalJSON_IncludeNonEmptyParentRunID(t *testing.T) { + now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) + event := transcript.ExchangeEvent{ + Seq: 1, + RunID: "run-123", + ParentRunID: "parent-456", + ChildRunID: "", + Type: transcript.EventTypeRunStarted, + Path: "step[0]", + Iteration: 0, + Timestamp: now, + Payload: nil, + } + + data, err := json.Marshal(event) + require.NoError(t, err) + + jsonStr := string(data) + assert.Contains(t, jsonStr, "parent_run_id", "non-empty parent_run_id should be included") + assert.NotContains(t, jsonStr, "child_run_id", "empty child_run_id should be omitted") +} + +// TestExchangeEventRoundTrip verifies that json.Marshal → json.Unmarshal recovers struct equality +func TestExchangeEventRoundTrip(t *testing.T) { + now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) + + tests := []struct { + name string + event transcript.ExchangeEvent + }{ + { + name: "with message payload", + event: transcript.ExchangeEvent{ + Seq: 1, + RunID: "run-123", + Type: transcript.EventTypeMessageUser, + Path: "step[0]", + Iteration: 0, + Timestamp: now, + Payload: &transcript.MessagePayload{ + Role: "user", + Blocks: []transcript.ContentBlock{ + { + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityRouter, + Text: "hello", + }, + }, + }, + }, + }, + { + name: "with step payload", + event: transcript.ExchangeEvent{ + Seq: 2, + RunID: "run-456", + Type: transcript.EventTypeStepCompleted, + Path: "step[1]", + Iteration: 1, + Timestamp: now, + Payload: &transcript.StepPayload{ + Name: "test-step", + Kind: "shell", + Result: "success", + }, + }, + }, + { + name: "with parent and child run ids", + event: transcript.ExchangeEvent{ + Seq: 3, + RunID: "run-789", + ParentRunID: "parent-123", + ChildRunID: "child-456", + Type: transcript.EventTypeStepCallWorkflowStarted, + Path: "step[2]", + Iteration: 0, + Timestamp: now, + Payload: &transcript.ToolPayload{Name: "workflow"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + data, err := json.Marshal(tt.event) + require.NoError(t, err) + + var recovered transcript.ExchangeEvent + err = json.Unmarshal(data, &recovered) + require.NoError(t, err) + + assert.Equal(t, tt.event, recovered) + }) + } +} + +// TestExchangeEventUnmarshalJSON_UnknownEventType verifies that unknown EventType returns ErrUnknownEventType +func TestExchangeEventUnmarshalJSON_UnknownEventType(t *testing.T) { + data := []byte(`{ + "seq": 1, + "run_id": "run-123", + "type": "unknown.event.type", + "path": "step[0]", + "iteration": 0, + "timestamp": "2024-01-01T12:00:00Z", + "payload": null + }`) + + var event transcript.ExchangeEvent + err := json.Unmarshal(data, &event) + assert.ErrorIs(t, err, transcript.ErrUnknownEventType) +} + +// TestExchangeEventUnmarshalJSON_AllEventTypes verifies unmarshaling all valid EventType values +func TestExchangeEventUnmarshalJSON_AllEventTypes(t *testing.T) { + tests := []struct { + name string + eventType transcript.EventType + }{ + {name: "run.started", eventType: transcript.EventTypeRunStarted}, + {name: "run.completed", eventType: transcript.EventTypeRunCompleted}, + {name: "step.started", eventType: transcript.EventTypeStepStarted}, + {name: "step.completed", eventType: transcript.EventTypeStepCompleted}, + {name: "step.call_workflow.started", eventType: transcript.EventTypeStepCallWorkflowStarted}, + {name: "step.call_workflow.completed", eventType: transcript.EventTypeStepCallWorkflowCompleted}, + {name: "message.user", eventType: transcript.EventTypeMessageUser}, + {name: "message.assistant", eventType: transcript.EventTypeMessageAssistant}, + {name: "tool.call", eventType: transcript.EventTypeToolCall}, + {name: "tool.result", eventType: transcript.EventTypeToolResult}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + jsonData := []byte(`{ + "seq": 1, + "run_id": "run-123", + "type": "` + string(tt.eventType) + `", + "path": "step[0]", + "iteration": 0, + "timestamp": "2024-01-01T12:00:00Z", + "payload": null + }`) + + var event transcript.ExchangeEvent + err := json.Unmarshal(jsonData, &event) + require.NoError(t, err) + assert.Equal(t, tt.eventType, event.Type) + }) + } +} + +// TestExchangeEventMarshalJSON_AllPayloadVariants verifies marshaling all payload types +func TestExchangeEventMarshalJSON_AllPayloadVariants(t *testing.T) { + now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) + + tests := []struct { + name string + payload any + }{ + { + name: "message payload", + payload: &transcript.MessagePayload{Role: "user"}, + }, + { + name: "step payload", + payload: &transcript.StepPayload{Name: "step-1"}, + }, + { + name: "tool payload", + payload: &transcript.ToolPayload{Name: "bash"}, + }, + { + name: "content block array", + payload: []transcript.ContentBlock{{Type: transcript.BlockTypeText}}, + }, + { + name: "nil payload", + payload: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + event := transcript.ExchangeEvent{ + Seq: 1, + RunID: "run-123", + Type: transcript.EventTypeRunStarted, + Path: "step[0]", + Iteration: 0, + Timestamp: now, + Payload: tt.payload, + } + + data, err := json.Marshal(event) + require.NoError(t, err) + assert.NotNil(t, data) + }) + } +} + +// TestExchangeEventFields verifies all ExchangeEvent fields are accessible +func TestExchangeEventFields(t *testing.T) { + now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) + event := transcript.ExchangeEvent{ + Seq: 42, + RunID: "run-123", + ParentRunID: "parent-456", + ChildRunID: "child-789", + Type: transcript.EventTypeRunStarted, + Path: "step[0]", + Iteration: 5, + Timestamp: now, + Payload: nil, + } + + assert.Equal(t, uint64(42), event.Seq) + assert.Equal(t, "run-123", event.RunID) + assert.Equal(t, "parent-456", event.ParentRunID) + assert.Equal(t, "child-789", event.ChildRunID) + assert.Equal(t, transcript.EventTypeRunStarted, event.Type) + assert.Equal(t, "step[0]", event.Path) + assert.Equal(t, 5, event.Iteration) + assert.Equal(t, now, event.Timestamp) + assert.Nil(t, event.Payload) +} + +// TestEventTypeEnumCoverage verifies all EventType constants are defined +func TestEventTypeEnumCoverage(t *testing.T) { + assert.Equal(t, transcript.EventType("run.started"), transcript.EventTypeRunStarted) + assert.Equal(t, transcript.EventType("run.completed"), transcript.EventTypeRunCompleted) + assert.Equal(t, transcript.EventType("step.started"), transcript.EventTypeStepStarted) + assert.Equal(t, transcript.EventType("step.completed"), transcript.EventTypeStepCompleted) + assert.Equal(t, transcript.EventType("step.call_workflow.started"), transcript.EventTypeStepCallWorkflowStarted) + assert.Equal(t, transcript.EventType("step.call_workflow.completed"), transcript.EventTypeStepCallWorkflowCompleted) + assert.Equal(t, transcript.EventType("message.user"), transcript.EventTypeMessageUser) + assert.Equal(t, transcript.EventType("message.assistant"), transcript.EventTypeMessageAssistant) + assert.Equal(t, transcript.EventType("tool.call"), transcript.EventTypeToolCall) + assert.Equal(t, transcript.EventType("tool.result"), transcript.EventTypeToolResult) +} + +// indexOfKey finds the position of a JSON key (followed by colon) to distinguish +// keys from string values that may share the same text (e.g. key "text" vs value "text"). +func indexOfKey(jsonStr, key string) int { + pos := strings.Index(jsonStr, "\""+key+"\":") + return pos +} diff --git a/internal/domain/transcript/payload.go b/internal/domain/transcript/payload.go new file mode 100644 index 00000000..a9e93b3e --- /dev/null +++ b/internal/domain/transcript/payload.go @@ -0,0 +1,22 @@ +package transcript + +type MessagePayload struct { + Role string `json:"role"` + Blocks []ContentBlock `json:"blocks"` +} + +type StepPayload struct { + Name string `json:"name"` + Kind string `json:"kind"` + Error string `json:"error,omitempty"` + Result any `json:"result,omitempty"` +} + +type ToolPayload struct { + Name string `json:"name"` + CallID string `json:"call_id"` + Input any `json:"input"` + Output any `json:"output"` + Error string `json:"error,omitempty"` + Fidelity Fidelity `json:"fidelity"` +} diff --git a/internal/domain/workflow/agent_config.go b/internal/domain/workflow/agent_config.go index ed7550b1..b5f1e184 100644 --- a/internal/domain/workflow/agent_config.go +++ b/internal/domain/workflow/agent_config.go @@ -132,7 +132,8 @@ func (c *AgentConfig) IsConversationMode() bool { // AgentResult holds the result of an agent execution. type AgentResult struct { Provider string // provider name used - Output string // raw output from agent CLI + Output string // extracted text response from the agent (NDJSON/lifecycle envelope stripped) + RawOutput string // unmodified provider stream (NDJSON for CLI providers); source for transcript normalization (F106 US2). Empty for providers with no raw stream (e.g. openai_compatible HTTP). DisplayOutput string // filtered human-readable output for display (empty when output_format=json or no parser) Response map[string]any // parsed JSON response (if applicable) Tokens int // token usage (if reported by provider) diff --git a/internal/domain/workflow/context.go b/internal/domain/workflow/context.go index 97bbfea1..bf09a71d 100644 --- a/internal/domain/workflow/context.go +++ b/internal/domain/workflow/context.go @@ -68,6 +68,7 @@ type ExecutionContext struct { mu sync.RWMutex // protects concurrent map access WorkflowID string WorkflowName string + ParentRunID string // non-empty when this run was spawned by a call_workflow step Status ExecutionStatus CurrentStep string ExitCode int // process exit code propagated from terminal steps (FR-004) diff --git a/internal/domain/workflow/conversation.go b/internal/domain/workflow/conversation.go index a8d9d0bc..c678bc99 100644 --- a/internal/domain/workflow/conversation.go +++ b/internal/domain/workflow/conversation.go @@ -155,6 +155,7 @@ type ConversationResult struct { Provider string // provider name used State *ConversationState // final conversation state Output string // final assistant response (last turn) + RawOutput string // unmodified provider stream for the last turn (NDJSON for CLI providers); source for transcript normalization (F106 US2) DisplayOutput string // filtered human-readable output for display (empty when output_format=json or no parser) Response map[string]any // parsed JSON response from last turn (if applicable) TokensInput int // total input tokens across all turns diff --git a/internal/infrastructure/agents/base_cli_provider.go b/internal/infrastructure/agents/base_cli_provider.go index 59eda1ef..20d530e4 100644 --- a/internal/infrastructure/agents/base_cli_provider.go +++ b/internal/infrastructure/agents/base_cli_provider.go @@ -241,6 +241,7 @@ func (b *baseCLIProvider) execute(ctx context.Context, prompt string, options ma result := &workflow.AgentResult{ Provider: b.name, Output: outputStr, + RawOutput: rawOutput, DisplayOutput: displayOutput, StartedAt: startedAt, CompletedAt: completedAt, @@ -381,6 +382,7 @@ func (b *baseCLIProvider) executeConversation(ctx context.Context, state *workfl Provider: b.name, State: workingState, Output: outputStr, + RawOutput: rawOutput, DisplayOutput: displayOutput, TokensInput: inputTokens, TokensOutput: assistantTurn.Tokens, diff --git a/internal/infrastructure/agents/claude_to_content_blocks.go b/internal/infrastructure/agents/claude_to_content_blocks.go new file mode 100644 index 00000000..cdba0389 --- /dev/null +++ b/internal/infrastructure/agents/claude_to_content_blocks.go @@ -0,0 +1,62 @@ +package agents + +import ( + "encoding/json" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +type claudeContentElement struct { + Type string `json:"type"` + Text string `json:"text"` + Thinking string `json:"thinking"` + ID string `json:"id"` + Name string `json:"name"` + Input map[string]any `json:"input"` +} + +type claudeNDJSONLine struct { + Type string `json:"type"` + Message *struct { + Content []claudeContentElement `json:"content"` + } `json:"message"` +} + +func ClaudeToContentBlocks(line []byte) []transcript.ContentBlock { + var evt claudeNDJSONLine + if err := json.Unmarshal(line, &evt); err != nil { + return []transcript.ContentBlock{} + } + if evt.Type != "assistant" || evt.Message == nil { + return []transcript.ContentBlock{} + } + blocks := make([]transcript.ContentBlock, 0, len(evt.Message.Content)) + for _, el := range evt.Message.Content { + switch el.Type { + case "text": + if el.Text == "" { + continue + } + blocks = append(blocks, transcript.ContentBlock{ + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityAgentEmitted, + Text: el.Text, + }) + case "thinking": + blocks = append(blocks, transcript.ContentBlock{ + Type: transcript.BlockTypeThinking, + Fidelity: transcript.FidelityAgentEmitted, + Thinking: el.Thinking, + }) + case "tool_use": + blocks = append(blocks, transcript.ContentBlock{ + Type: transcript.BlockTypeToolUse, + Fidelity: transcript.FidelityAgentEmitted, + ToolName: el.Name, + ToolID: el.ID, + ToolInput: el.Input, + }) + } + } + return blocks +} diff --git a/internal/infrastructure/agents/claude_to_content_blocks_test.go b/internal/infrastructure/agents/claude_to_content_blocks_test.go new file mode 100644 index 00000000..22071ab2 --- /dev/null +++ b/internal/infrastructure/agents/claude_to_content_blocks_test.go @@ -0,0 +1,107 @@ +package agents + +import ( + "testing" + + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestClaudeToContentBlocks_TextThinkingToolUse(t *testing.T) { + line := []byte(`{"type":"assistant","message":{"content":[{"type":"text","text":"a"},{"type":"thinking","thinking":"t"},{"type":"tool_use","id":"x","name":"n","input":{}}]}}`) + + got := ClaudeToContentBlocks(line) + + require.Len(t, got, 3) + + assert.Equal(t, transcript.BlockTypeText, got[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, got[0].Fidelity) + assert.Equal(t, "a", got[0].Text) + + assert.Equal(t, transcript.BlockTypeThinking, got[1].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, got[1].Fidelity) + assert.Equal(t, "t", got[1].Thinking) + + assert.Equal(t, transcript.BlockTypeToolUse, got[2].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, got[2].Fidelity) + assert.Equal(t, "n", got[2].ToolName) + assert.Equal(t, "x", got[2].ToolID) +} + +func TestClaudeToContentBlocks_DanglingToolUseWithoutResult(t *testing.T) { + line := []byte(`{"type":"assistant","message":{"content":[{"type":"tool_use","id":"dangling","name":"bash","input":{"cmd":"ls"}}]}}`) + + got := ClaudeToContentBlocks(line) + + require.Len(t, got, 1) + assert.Equal(t, transcript.BlockTypeToolUse, got[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, got[0].Fidelity) + assert.Equal(t, "dangling", got[0].ToolID) + assert.Equal(t, "bash", got[0].ToolName) +} + +func TestClaudeToContentBlocks_NonAssistantLineReturnsEmpty(t *testing.T) { + tests := []struct { + name string + line []byte + }{ + {"user type", []byte(`{"type":"user","message":{"content":[{"type":"text","text":"hi"}]}}`)}, + {"missing message", []byte(`{"type":"assistant"}`)}, + {"system type", []byte(`{"type":"system","session_id":"abc"}`)}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := ClaudeToContentBlocks(tc.line) + assert.Equal(t, []transcript.ContentBlock{}, got) + }) + } +} + +func TestClaudeToContentBlocks_MalformedJSON(t *testing.T) { + got := ClaudeToContentBlocks([]byte(`{not valid json}`)) + assert.Equal(t, []transcript.ContentBlock{}, got) +} + +func TestClaudeToContentBlocks_AllFidelitiesAreAgentEmitted(t *testing.T) { + line := []byte(`{"type":"assistant","message":{"content":[{"type":"text","text":"hello"},{"type":"thinking","thinking":"..."},{"type":"tool_use","id":"1","name":"grep","input":{}}]}}`) + + got := ClaudeToContentBlocks(line) + + require.Len(t, got, 3) + for _, block := range got { + assert.Equal(t, transcript.FidelityAgentEmitted, block.Fidelity) + } +} + +func TestClaudeToContentBlocks_UnknownContentTypeSkipped(t *testing.T) { + line := []byte(`{"type":"assistant","message":{"content":[{"type":"unknown_future_type"},{"type":"text","text":"keep"}]}}`) + + got := ClaudeToContentBlocks(line) + + require.Len(t, got, 1) + assert.Equal(t, transcript.BlockTypeText, got[0].Type) + assert.Equal(t, "keep", got[0].Text) +} + +func TestClaudeToContentBlocks_EmptyContentArrayReturnsEmpty(t *testing.T) { + line := []byte(`{"type":"assistant","message":{"content":[]}}`) + + got := ClaudeToContentBlocks(line) + + assert.Equal(t, []transcript.ContentBlock{}, got) +} + +func TestClaudeToContentBlocks_NeverReturnsNil(t *testing.T) { + cases := [][]byte{ + []byte(`{}`), + []byte(`{"type":"assistant","message":{"content":[]}}`), + []byte(`{bad json`), + nil, + } + for _, line := range cases { + got := ClaudeToContentBlocks(line) + assert.NotNil(t, got) + } +} diff --git a/internal/infrastructure/agents/codex_to_content_blocks.go b/internal/infrastructure/agents/codex_to_content_blocks.go new file mode 100644 index 00000000..65635e21 --- /dev/null +++ b/internal/infrastructure/agents/codex_to_content_blocks.go @@ -0,0 +1,59 @@ +package agents + +import ( + "bytes" + "encoding/json" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +type codexItem struct { + ItemType string `json:"item_type"` + Kind string `json:"type"` + Text string `json:"text"` + Name string `json:"name"` + Arguments string `json:"arguments"` +} + +type codexNDJSONLine struct { + Type string `json:"type"` + Item *codexItem `json:"item"` +} + +// CodexToContentBlocks maps a single Codex NDJSON line to transcript ContentBlocks. +// Raw NUL bytes (0x00) are escaped to the six-byte JSON unicode sequence before +// unmarshal, matching the behavior in codex_provider.go:343. +func CodexToContentBlocks(line []byte) []transcript.ContentBlock { + sanitized := bytes.ReplaceAll(line, []byte{0x00}, []byte{0x5c, 0x75, 0x30, 0x30, 0x30, 0x30}) + + var evt codexNDJSONLine + if err := json.Unmarshal(sanitized, &evt); err != nil { + return []transcript.ContentBlock{} + } + if evt.Type != "item.completed" || evt.Item == nil { + return []transcript.ContentBlock{} + } + + itemKind := evt.Item.ItemType + if itemKind == "" { + itemKind = evt.Item.Kind + } + + switch itemKind { + case "assistant_message", "agent_message": + return []transcript.ContentBlock{{ + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityAgentEmitted, + Text: evt.Item.Text, + }} + case "function_call", "command_execution": + return []transcript.ContentBlock{{ + Type: transcript.BlockTypeToolUse, + Fidelity: transcript.FidelityAgentEmitted, + ToolName: evt.Item.Name, + ToolID: "", + ToolInput: evt.Item.Arguments, + }} + } + return []transcript.ContentBlock{} +} diff --git a/internal/infrastructure/agents/codex_to_content_blocks_test.go b/internal/infrastructure/agents/codex_to_content_blocks_test.go new file mode 100644 index 00000000..b0b5409b --- /dev/null +++ b/internal/infrastructure/agents/codex_to_content_blocks_test.go @@ -0,0 +1,149 @@ +package agents + +import ( + "encoding/json" + "testing" + + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCodexToContentBlocks_HappyPathTextOnly(t *testing.T) { + line := []byte(`{"type":"item.completed","item":{"item_type":"assistant_message","text":"hello"}}`) + + got := CodexToContentBlocks(line) + + require.Len(t, got, 1) + assert.Equal(t, transcript.BlockTypeText, got[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, got[0].Fidelity) + assert.Equal(t, "hello", got[0].Text) +} + +func TestCodexToContentBlocks_AllItemKinds(t *testing.T) { + tests := []struct { + name string + line []byte + wantType transcript.BlockType + wantText string + wantTool string + wantInput string + }{ + { + name: "agent_message via item_type", + line: []byte(`{"type":"item.completed","item":{"item_type":"agent_message","text":"resp"}}`), + wantType: transcript.BlockTypeText, + wantText: "resp", + }, + { + name: "assistant_message via type field", + line: []byte(`{"type":"item.completed","item":{"type":"assistant_message","text":"out"}}`), + wantType: transcript.BlockTypeText, + wantText: "out", + }, + { + name: "function_call via item_type", + line: []byte(`{"type":"item.completed","item":{"item_type":"function_call","name":"bash","arguments":"{\"cmd\":\"ls\"}"}}`), + wantType: transcript.BlockTypeToolUse, + wantTool: "bash", + wantInput: `{"cmd":"ls"}`, + }, + { + name: "command_execution via type field", + line: []byte(`{"type":"item.completed","item":{"type":"command_execution","name":"grep","arguments":"{}"}}`), + wantType: transcript.BlockTypeToolUse, + wantTool: "grep", + wantInput: "{}", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := CodexToContentBlocks(tc.line) + + require.Len(t, got, 1) + assert.Equal(t, tc.wantType, got[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, got[0].Fidelity) + if tc.wantType == transcript.BlockTypeText { + assert.Equal(t, tc.wantText, got[0].Text) + } else { + assert.Equal(t, tc.wantTool, got[0].ToolName) + assert.Equal(t, "", got[0].ToolID) // Codex emits no tool-call ids + assert.Equal(t, tc.wantInput, got[0].ToolInput) + } + }) + } +} + +func TestCodexToContentBlocks_EmbeddedNUL(t *testing.T) { + // Build a JSON line with a raw NUL byte inside the text value; raw NUL is + // not valid JSON so the mapper must escape it to \u0000 before unmarshal. + prefix := []byte(`{"type":"item.completed","item":{"item_type":"assistant_message","text":"hello`) + suffix := []byte(`world"}}`) + line := make([]byte, 0, len(prefix)+1+len(suffix)) + line = append(line, prefix...) + line = append(line, 0x00) + line = append(line, suffix...) + + got := CodexToContentBlocks(line) + + require.Len(t, got, 1) + assert.Equal(t, transcript.BlockTypeText, got[0].Type) + assert.Equal(t, "hello\x00world", got[0].Text) + + marshaled, err := got[0].MarshalJSON() + require.NoError(t, err) + assert.Contains(t, string(marshaled), "\\u0000") + + var recovered transcript.ContentBlock + require.NoError(t, json.Unmarshal(marshaled, &recovered)) + assert.Equal(t, "hello\x00world", recovered.Text) +} + +func TestCodexToContentBlocks_DanglingToolUse(t *testing.T) { + line := []byte(`{"type":"item.completed","item":{"item_type":"function_call","name":"shell","arguments":"{\"cmd\":\"rm -rf /tmp/x\"}"}}`) + + got := CodexToContentBlocks(line) + + require.Len(t, got, 1) + assert.Equal(t, transcript.BlockTypeToolUse, got[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, got[0].Fidelity) + assert.Equal(t, "shell", got[0].ToolName) + assert.Equal(t, "", got[0].ToolID) +} + +func TestCodexToContentBlocks_MalformedJSON(t *testing.T) { + got := CodexToContentBlocks([]byte(`{not valid json}`)) + assert.Equal(t, []transcript.ContentBlock{}, got) +} + +func TestCodexToContentBlocks_WrongDiscriminator(t *testing.T) { + tests := []struct { + name string + line []byte + }{ + {"wrong type field", []byte(`{"type":"item.created","item":{"item_type":"assistant_message","text":"hi"}}`)}, + {"nil item", []byte(`{"type":"item.completed"}`)}, + {"empty object", []byte(`{}`)}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := CodexToContentBlocks(tc.line) + assert.Equal(t, []transcript.ContentBlock{}, got) + }) + } +} + +func TestCodexToContentBlocks_NeverReturnsNil(t *testing.T) { + cases := [][]byte{ + []byte(`{}`), + []byte(`{"type":"item.completed"}`), + []byte(`{bad json`), + nil, + } + for _, line := range cases { + got := CodexToContentBlocks(line) + assert.NotNil(t, got) + } +} diff --git a/internal/infrastructure/agents/content_block_normalizer.go b/internal/infrastructure/agents/content_block_normalizer.go new file mode 100644 index 00000000..b12a2e55 --- /dev/null +++ b/internal/infrastructure/agents/content_block_normalizer.go @@ -0,0 +1,71 @@ +package agents + +import ( + "bufio" + "bytes" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +// maxNormalizerLineBytes bounds a single NDJSON line during normalization. Agent +// responses (especially Codex/Claude with large text blocks) routinely exceed +// bufio.Scanner's default 64KB token, which would otherwise silently drop the line. +const maxNormalizerLineBytes = 10 << 20 // 10 MiB + +// lineNormalizer maps one provider NDJSON line to canonical ContentBlocks. +type lineNormalizer func(line []byte) []transcript.ContentBlock + +// ContentBlockNormalizer is the infrastructure implementation of +// ports.AgentOutputNormalizer. It dispatches a provider's raw CLI output to the +// matching per-provider line normalizer (the *ToContentBlocks functions), absorbing +// provider divergence in this single layer (F106 SC-002). +type ContentBlockNormalizer struct{} + +// NewContentBlockNormalizer returns a ready-to-use normalizer. It is stateless, so the +// zero value works too; the constructor exists for wiring symmetry. +func NewContentBlockNormalizer() ContentBlockNormalizer { + return ContentBlockNormalizer{} +} + +// Normalize scans the provider's raw NDJSON output line-by-line and concatenates the +// blocks produced for each line. It returns nil for an unknown provider, empty output, +// or output that yields no blocks. It never panics on malformed input — each line +// normalizer tolerates unparseable lines by returning no blocks. +func (ContentBlockNormalizer) Normalize(provider string, rawOutput []byte) []transcript.ContentBlock { + fn := lineNormalizerFor(provider) + if fn == nil || len(rawOutput) == 0 { + return nil + } + + var blocks []transcript.ContentBlock + scanner := bufio.NewScanner(bytes.NewReader(rawOutput)) + scanner.Buffer(make([]byte, 0, 64*1024), maxNormalizerLineBytes) + for scanner.Scan() { + line := scanner.Bytes() + if len(bytes.TrimSpace(line)) == 0 { + continue + } + blocks = append(blocks, fn(line)...) + } + return blocks +} + +// lineNormalizerFor resolves the per-line normalizer for a resolved provider name. +// The names match the agent registry keys (provider.Name()). Providers without a +// transcript normalizer (e.g. opencode, reserved for a future feature) return nil. +func lineNormalizerFor(provider string) lineNormalizer { + switch provider { + case "claude": + return ClaudeToContentBlocks + case "codex": + return CodexToContentBlocks + case "gemini": + return GeminiToContentBlocks + case "github_copilot": + return CopilotToContentBlocks + case "openai_compatible": + return OpenAICompatibleToContentBlocks + default: + return nil + } +} diff --git a/internal/infrastructure/agents/content_block_normalizer_test.go b/internal/infrastructure/agents/content_block_normalizer_test.go new file mode 100644 index 00000000..bbaf88bf --- /dev/null +++ b/internal/infrastructure/agents/content_block_normalizer_test.go @@ -0,0 +1,67 @@ +package agents + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +// TestContentBlockNormalizer_DispatchesByProvider verifies the registry maps each +// supported provider name to its per-line normalizer and produces agent_emitted blocks. +func TestContentBlockNormalizer_DispatchesByProvider(t *testing.T) { + n := NewContentBlockNormalizer() + + raw := `{"type":"assistant","message":{"content":[{"type":"text","text":"hello"}]}}` + "\n" + blocks := n.Normalize("claude", []byte(raw)) + + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeText, blocks[0].Type) + assert.Equal(t, "hello", blocks[0].Text) + assert.Equal(t, transcript.FidelityAgentEmitted, blocks[0].Fidelity) +} + +// TestContentBlockNormalizer_AccumulatesAcrossLines verifies multi-line NDJSON output +// is scanned line-by-line and the resulting blocks are concatenated in order. +func TestContentBlockNormalizer_AccumulatesAcrossLines(t *testing.T) { + n := NewContentBlockNormalizer() + + raw := strings.Join([]string{ + `{"type":"assistant","message":{"content":[{"type":"thinking","thinking":"hmm"}]}}`, + `{"type":"assistant","message":{"content":[{"type":"text","text":"answer"}]}}`, + "", // blank line must be skipped, not panic + }, "\n") + + blocks := n.Normalize("claude", []byte(raw)) + + require.Len(t, blocks, 2) + assert.Equal(t, transcript.BlockTypeThinking, blocks[0].Type) + assert.Equal(t, transcript.BlockTypeText, blocks[1].Type) +} + +// TestContentBlockNormalizer_UnknownProviderYieldsNil verifies unknown providers and +// empty output produce no blocks (and never panic). +func TestContentBlockNormalizer_UnknownProviderYieldsNil(t *testing.T) { + n := NewContentBlockNormalizer() + + assert.Nil(t, n.Normalize("unknown_provider", []byte(`{"type":"assistant"}`))) + assert.Nil(t, n.Normalize("claude", nil)) + assert.Nil(t, n.Normalize("claude", []byte(""))) +} + +// TestContentBlockNormalizer_HandlesLongLines verifies a line larger than bufio.Scanner's +// default 64KB token is not silently dropped (agent responses can be large). +func TestContentBlockNormalizer_HandlesLongLines(t *testing.T) { + n := NewContentBlockNormalizer() + + big := strings.Repeat("x", 200*1024) // 200KB > default 64KB scanner token + raw := `{"type":"assistant","message":{"content":[{"type":"text","text":"` + big + `"}]}}` + + blocks := n.Normalize("claude", []byte(raw)) + + require.Len(t, blocks, 1) + assert.Equal(t, big, blocks[0].Text) +} diff --git a/internal/infrastructure/agents/copilot_to_content_blocks.go b/internal/infrastructure/agents/copilot_to_content_blocks.go new file mode 100644 index 00000000..d987fdf0 --- /dev/null +++ b/internal/infrastructure/agents/copilot_to_content_blocks.go @@ -0,0 +1,51 @@ +package agents + +import ( + "encoding/json" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +type copilotDisplayLine struct { + Type string `json:"type"` + Data map[string]any `json:"data"` +} + +// CopilotToContentBlocks maps a single Copilot NDJSON line to transcript ContentBlocks. +// Mirrors the event-typed shape consumed by parseCopilotDisplayEvents in copilot_provider.go. +func CopilotToContentBlocks(line []byte) []transcript.ContentBlock { + if len(line) == 0 { + return []transcript.ContentBlock{} + } + var event copilotDisplayLine + if err := json.Unmarshal(line, &event); err != nil { + return []transcript.ContentBlock{} + } + switch event.Type { + case "assistant.message": + var content string + if s, ok := event.Data["content"].(string); ok { + content = s + } + if content == "" { + return []transcript.ContentBlock{} + } + return []transcript.ContentBlock{{ + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityAgentEmitted, + Text: content, + }} + case "tool.execution_start": + var toolName string + if s, ok := event.Data["toolName"].(string); ok { + toolName = s + } + return []transcript.ContentBlock{{ + Type: transcript.BlockTypeToolUse, + Fidelity: transcript.FidelityAgentEmitted, + ToolName: toolName, + }} + default: + return []transcript.ContentBlock{} + } +} diff --git a/internal/infrastructure/agents/copilot_to_content_blocks_test.go b/internal/infrastructure/agents/copilot_to_content_blocks_test.go new file mode 100644 index 00000000..cf9bb58c --- /dev/null +++ b/internal/infrastructure/agents/copilot_to_content_blocks_test.go @@ -0,0 +1,157 @@ +package agents + +import ( + "testing" + + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCopilotToContentBlocks(t *testing.T) { + tests := []struct { + name string + line []byte + wantCount int + validate func(t *testing.T, blocks []transcript.ContentBlock) + }{ + { + name: "assistant message with content", + line: []byte(`{"type":"assistant.message","data":{"content":"hi"}}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeText, blocks[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, blocks[0].Fidelity) + assert.Equal(t, "hi", blocks[0].Text) + }, + }, + { + name: "assistant message with multiline content", + line: []byte(`{"type":"assistant.message","data":{"content":"line1\nline2"}}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, "line1\nline2", blocks[0].Text) + }, + }, + { + name: "assistant message with empty content", + line: []byte(`{"type":"assistant.message","data":{"content":""}}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "assistant message missing content field", + line: []byte(`{"type":"assistant.message","data":{}}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "tool execution start with tool name", + line: []byte(`{"type":"tool.execution_start","data":{"toolName":"bash"}}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeToolUse, blocks[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, blocks[0].Fidelity) + assert.Equal(t, "bash", blocks[0].ToolName) + assert.Empty(t, blocks[0].ToolID) + }, + }, + { + name: "tool execution start with empty tool name", + line: []byte(`{"type":"tool.execution_start","data":{"toolName":""}}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeToolUse, blocks[0].Type) + assert.Empty(t, blocks[0].ToolName) + }, + }, + { + name: "tool execution start missing tool name", + line: []byte(`{"type":"tool.execution_start","data":{}}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeToolUse, blocks[0].Type) + assert.Empty(t, blocks[0].ToolName) + }, + }, + { + name: "unknown event type", + line: []byte(`{"type":"unknown.event","data":{"content":"data"}}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "malformed json invalid utf8", + line: []byte{0xff, 0xfe}, + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "malformed json truncated", + line: []byte(`{"type":"assistant.message","data":`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "malformed json invalid structure", + line: []byte(`not json at all`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "empty input", + line: []byte(``), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + blocks := CopilotToContentBlocks(tt.line) + tt.validate(t, blocks) + }) + } +} + +func TestCopilotToContentBlocks_AssistantMessage(t *testing.T) { + line := []byte(`{"type":"assistant.message","data":{"content":"hi"}}`) + blocks := CopilotToContentBlocks(line) + + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeText, blocks[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, blocks[0].Fidelity) + assert.Equal(t, "hi", blocks[0].Text) +} + +func TestCopilotToContentBlocks_ToolExecutionStart(t *testing.T) { + line := []byte(`{"type":"tool.execution_start","data":{"toolName":"bash"}}`) + blocks := CopilotToContentBlocks(line) + + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeToolUse, blocks[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, blocks[0].Fidelity) + assert.Equal(t, "bash", blocks[0].ToolName) +} + +func TestCopilotToContentBlocks_UnknownType(t *testing.T) { + line := []byte(`{"type":"x"}`) + blocks := CopilotToContentBlocks(line) + + assert.Empty(t, blocks) +} + +func TestCopilotToContentBlocks_MalformedJSON(t *testing.T) { + line := []byte(`{invalid}`) + blocks := CopilotToContentBlocks(line) + + assert.Empty(t, blocks) + // ensure no panic occurred +} diff --git a/internal/infrastructure/agents/gemini_to_content_blocks.go b/internal/infrastructure/agents/gemini_to_content_blocks.go new file mode 100644 index 00000000..0270d03c --- /dev/null +++ b/internal/infrastructure/agents/gemini_to_content_blocks.go @@ -0,0 +1,46 @@ +package agents + +import ( + "encoding/json" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +type geminiToolCall struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments"` +} + +type geminiNDJSONLine struct { + Type string `json:"type"` + Role string `json:"role"` + Content string `json:"content"` + ToolCalls []geminiToolCall `json:"toolCalls"` +} + +func GeminiToContentBlocks(line []byte) []transcript.ContentBlock { + var evt geminiNDJSONLine + if err := json.Unmarshal(line, &evt); err != nil { + return []transcript.ContentBlock{} + } + if evt.Type != "message" || evt.Role != "assistant" { + return []transcript.ContentBlock{} + } + blocks := make([]transcript.ContentBlock, 0, 1+len(evt.ToolCalls)) + if evt.Content != "" { + blocks = append(blocks, transcript.ContentBlock{ + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityAgentEmitted, + Text: evt.Content, + }) + } + for _, tc := range evt.ToolCalls { + blocks = append(blocks, transcript.ContentBlock{ + Type: transcript.BlockTypeToolUse, + Fidelity: transcript.FidelityAgentEmitted, + ToolName: tc.Name, + ToolInput: tc.Arguments, + }) + } + return blocks +} diff --git a/internal/infrastructure/agents/gemini_to_content_blocks_test.go b/internal/infrastructure/agents/gemini_to_content_blocks_test.go new file mode 100644 index 00000000..375f23d0 --- /dev/null +++ b/internal/infrastructure/agents/gemini_to_content_blocks_test.go @@ -0,0 +1,92 @@ +package agents + +import ( + "testing" + + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGeminiToContentBlocks_TextAndToolCall(t *testing.T) { + line := []byte(`{"type":"message","role":"assistant","content":"hi","toolCalls":[{"name":"n","arguments":{"k":1}}]}`) + + got := GeminiToContentBlocks(line) + + require.Len(t, got, 2) + + assert.Equal(t, transcript.BlockTypeText, got[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, got[0].Fidelity) + assert.Equal(t, "hi", got[0].Text) + + assert.Equal(t, transcript.BlockTypeToolUse, got[1].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, got[1].Fidelity) + assert.Equal(t, "n", got[1].ToolName) + assert.Equal(t, "", got[1].ToolID) + assert.Equal(t, map[string]any{"k": float64(1)}, got[1].ToolInput) +} + +func TestGeminiToContentBlocks_WrongDiscriminator(t *testing.T) { + tests := []struct { + name string + line []byte + }{ + {"user role", []byte(`{"type":"message","role":"user","content":"hi"}`)}, + {"wrong type", []byte(`{"type":"system","role":"assistant","content":"hi"}`)}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := GeminiToContentBlocks(tc.line) + assert.Equal(t, []transcript.ContentBlock{}, got) + }) + } +} + +func TestGeminiToContentBlocks_EmptyContent(t *testing.T) { + line := []byte(`{"type":"message","role":"assistant","content":"","toolCalls":[]}`) + + got := GeminiToContentBlocks(line) + + assert.Equal(t, []transcript.ContentBlock{}, got) +} + +func TestGeminiToContentBlocks_MalformedJSON(t *testing.T) { + got := GeminiToContentBlocks([]byte(`{not valid json}`)) + assert.Equal(t, []transcript.ContentBlock{}, got) +} + +func TestGeminiToContentBlocks_AllFidelitiesAreAgentEmitted(t *testing.T) { + line := []byte(`{"type":"message","role":"assistant","content":"text","toolCalls":[{"name":"tool","arguments":{}}]}`) + + got := GeminiToContentBlocks(line) + + require.Len(t, got, 2) + for _, block := range got { + assert.Equal(t, transcript.FidelityAgentEmitted, block.Fidelity) + } +} + +func TestGeminiToContentBlocks_NeverReturnsNil(t *testing.T) { + cases := [][]byte{ + []byte(`{}`), + []byte(`{"type":"message","role":"assistant","content":"","toolCalls":[]}`), + []byte(`{bad json`), + nil, + } + for _, line := range cases { + got := GeminiToContentBlocks(line) + assert.NotNil(t, got) + } +} + +func TestGeminiToContentBlocks_OnlyToolCalls(t *testing.T) { + line := []byte(`{"type":"message","role":"assistant","content":"","toolCalls":[{"name":"bash","arguments":{"cmd":"ls"}}]}`) + + got := GeminiToContentBlocks(line) + + require.Len(t, got, 1) + assert.Equal(t, transcript.BlockTypeToolUse, got[0].Type) + assert.Equal(t, "bash", got[0].ToolName) + assert.Equal(t, "", got[0].ToolID) +} diff --git a/internal/infrastructure/agents/golden_transcript_test.go b/internal/infrastructure/agents/golden_transcript_test.go new file mode 100644 index 00000000..df88a66a --- /dev/null +++ b/internal/infrastructure/agents/golden_transcript_test.go @@ -0,0 +1,93 @@ +package agents + +import ( + "bufio" + "bytes" + "encoding/json" + "os" + "testing" + + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/stretchr/testify/require" +) + +func TestGoldenTranscript_PerProvider(t *testing.T) { + tests := []struct { + provider string + inputPath string + goldenPath string + fn func([]byte) []transcript.ContentBlock + }{ + { + provider: "claude", + inputPath: "testdata/transcript/claude/input.jsonl", + goldenPath: "testdata/transcript/golden/claude.json", + fn: ClaudeToContentBlocks, + }, + { + provider: "codex", + inputPath: "testdata/transcript/codex/input.jsonl", + goldenPath: "testdata/transcript/golden/codex.json", + fn: CodexToContentBlocks, + }, + { + provider: "gemini", + inputPath: "testdata/transcript/gemini/input.jsonl", + goldenPath: "testdata/transcript/golden/gemini.json", + fn: GeminiToContentBlocks, + }, + { + provider: "copilot", + inputPath: "testdata/transcript/copilot/input.jsonl", + goldenPath: "testdata/transcript/golden/copilot.json", + fn: CopilotToContentBlocks, + }, + { + provider: "openaicompatible", + inputPath: "testdata/transcript/openaicompatible/input.jsonl", + goldenPath: "testdata/transcript/golden/openaicompatible.json", + fn: OpenAICompatibleToContentBlocks, + }, + } + + for _, tt := range tests { + t.Run(tt.provider, func(t *testing.T) { + inputData, err := os.ReadFile(tt.inputPath) + require.NoError(t, err, "reading input fixture") + + var blocks []transcript.ContentBlock + scanner := bufio.NewScanner(bytes.NewReader(inputData)) + for scanner.Scan() { + line := scanner.Bytes() + if len(bytes.TrimSpace(line)) == 0 { + continue + } + blocks = append(blocks, tt.fn(line)...) + } + require.NoError(t, scanner.Err(), "scanning input fixture") + + if blocks == nil { + blocks = []transcript.ContentBlock{} + } + + actual, err := json.MarshalIndent(blocks, "", " ") + require.NoError(t, err, "marshaling content blocks") + actual = append(actual, '\n') + + if os.Getenv("UPDATE_GOLDEN") != "" { + err = os.MkdirAll("testdata/transcript/golden", 0o755) + require.NoError(t, err, "creating golden directory") + err = os.WriteFile(tt.goldenPath, actual, 0o644) + require.NoError(t, err, "writing golden file") + return + } + + expected, err := os.ReadFile(tt.goldenPath) + require.NoErrorf(t, err, "golden file %q missing — run with UPDATE_GOLDEN=1 to generate it", tt.goldenPath) + + require.Equal(t, string(expected), string(actual), + "golden mismatch for provider %q — if intentional, re-run with UPDATE_GOLDEN=1 to refresh the golden file", + tt.provider) + }) + } +} diff --git a/internal/infrastructure/agents/openai_compatible_to_content_blocks.go b/internal/infrastructure/agents/openai_compatible_to_content_blocks.go new file mode 100644 index 00000000..ed095cae --- /dev/null +++ b/internal/infrastructure/agents/openai_compatible_to_content_blocks.go @@ -0,0 +1,75 @@ +package agents + +import ( + "encoding/json" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +type openAICompatibleToolCall struct { + ID string `json:"id"` + Function struct { + Name string `json:"name"` + Arguments string `json:"arguments"` + } `json:"function"` +} + +type openAICompatibleDeltaOrMessage struct { + Content string `json:"content"` + ToolCalls []openAICompatibleToolCall `json:"tool_calls"` +} + +type openAICompatibleChunk struct { + Object string `json:"object"` + Choices []struct { + Delta openAICompatibleDeltaOrMessage `json:"delta"` + Message openAICompatibleDeltaOrMessage `json:"message"` + } `json:"choices"` +} + +// OpenAICompatibleToContentBlocks maps a single OpenAI-compatible NDJSON line to transcript ContentBlocks. +// Mirrors the delta/message shape consumed by translateOpenAICompatibleDisplayEvents in openai_compatible_provider.go. +func OpenAICompatibleToContentBlocks(line []byte) []transcript.ContentBlock { + if len(line) == 0 { + return []transcript.ContentBlock{} + } + var chunk openAICompatibleChunk + if err := json.Unmarshal(line, &chunk); err != nil { + return []transcript.ContentBlock{} + } + if chunk.Object == "" || len(chunk.Choices) == 0 { + return []transcript.ContentBlock{} + } + + choice := chunk.Choices[0] + src := choice.Delta + if src.Content == "" && len(src.ToolCalls) == 0 { + src = choice.Message + } + + var blocks []transcript.ContentBlock + if src.Content != "" { + blocks = append(blocks, transcript.ContentBlock{ + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityAgentEmitted, + Text: src.Content, + }) + } + for _, tc := range src.ToolCalls { + block := transcript.ContentBlock{ + Type: transcript.BlockTypeToolUse, + Fidelity: transcript.FidelityAgentEmitted, + ToolName: tc.Function.Name, + ToolID: tc.ID, + } + if tc.Function.Arguments != "" { + block.ToolInput = tc.Function.Arguments + } + blocks = append(blocks, block) + } + + if blocks == nil { + return []transcript.ContentBlock{} + } + return blocks +} diff --git a/internal/infrastructure/agents/openai_compatible_to_content_blocks_test.go b/internal/infrastructure/agents/openai_compatible_to_content_blocks_test.go new file mode 100644 index 00000000..a0018b8a --- /dev/null +++ b/internal/infrastructure/agents/openai_compatible_to_content_blocks_test.go @@ -0,0 +1,247 @@ +package agents + +import ( + "testing" + + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOpenAICompatibleToContentBlocks(t *testing.T) { + tests := []struct { + name string + line []byte + validate func(t *testing.T, blocks []transcript.ContentBlock) + }{ + { + name: "delta with content only", + line: []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{"content":"hello"}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeText, blocks[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, blocks[0].Fidelity) + assert.Equal(t, "hello", blocks[0].Text) + }, + }, + { + name: "delta with tool call only", + line: []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{"tool_calls":[{"id":"t1","function":{"name":"bash","arguments":"{}"}}]}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeToolUse, blocks[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, blocks[0].Fidelity) + assert.Equal(t, "bash", blocks[0].ToolName) + assert.Equal(t, "t1", blocks[0].ToolID) + }, + }, + { + name: "delta with content and single tool call", + line: []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{"content":"hi","tool_calls":[{"id":"t1","function":{"name":"n","arguments":"{}"}}]}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 2) + assert.Equal(t, transcript.BlockTypeText, blocks[0].Type) + assert.Equal(t, "hi", blocks[0].Text) + assert.Equal(t, transcript.BlockTypeToolUse, blocks[1].Type) + assert.Equal(t, "n", blocks[1].ToolName) + assert.Equal(t, "t1", blocks[1].ToolID) + }, + }, + { + name: "delta with content and multiple tool calls", + line: []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{"content":"text","tool_calls":[{"id":"t1","function":{"name":"tool1","arguments":"{}"}},{"id":"t2","function":{"name":"tool2","arguments":"{\"key\":\"value\"}"}}]}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 3) + assert.Equal(t, transcript.BlockTypeText, blocks[0].Type) + assert.Equal(t, transcript.BlockTypeToolUse, blocks[1].Type) + assert.Equal(t, "tool1", blocks[1].ToolName) + assert.Equal(t, "t1", blocks[1].ToolID) + assert.Equal(t, transcript.BlockTypeToolUse, blocks[2].Type) + assert.Equal(t, "tool2", blocks[2].ToolName) + assert.Equal(t, "t2", blocks[2].ToolID) + }, + }, + { + name: "message fallback when delta is empty", + line: []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{},"message":{"content":"fallback"}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeText, blocks[0].Type) + assert.Equal(t, "fallback", blocks[0].Text) + }, + }, + { + name: "message fallback with tool call", + line: []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{},"message":{"tool_calls":[{"id":"t3","function":{"name":"fallback_tool","arguments":"{}"}}]}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeToolUse, blocks[0].Type) + assert.Equal(t, "fallback_tool", blocks[0].ToolName) + assert.Equal(t, "t3", blocks[0].ToolID) + }, + }, + { + name: "delta preferred over message", + line: []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{"content":"delta_content"},"message":{"content":"message_content"}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, "delta_content", blocks[0].Text) + }, + }, + { + name: "empty object", + line: []byte(`{"object":"","choices":[{"delta":{"content":"text"}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "empty choices array", + line: []byte(`{"object":"chat.completion.chunk","choices":[]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "missing object field", + line: []byte(`{"choices":[{"delta":{"content":"text"}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "missing choices field", + line: []byte(`{"object":"chat.completion.chunk"}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "delta and message both empty", + line: []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{},"message":{}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "empty content string", + line: []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{"content":""}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "tool call with empty name", + line: []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{"tool_calls":[{"id":"t1","function":{"name":"","arguments":"{}"}}]}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeToolUse, blocks[0].Type) + assert.Empty(t, blocks[0].ToolName) + assert.Equal(t, "t1", blocks[0].ToolID) + }, + }, + { + name: "tool call with complex arguments", + line: []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{"tool_calls":[{"id":"t1","function":{"name":"tool","arguments":"{\"nested\":{\"key\":\"value\"}}"}}]}}]}`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + require.Len(t, blocks, 1) + assert.Equal(t, "tool", blocks[0].ToolName) + assert.Equal(t, "t1", blocks[0].ToolID) + }, + }, + { + name: "malformed json invalid utf8", + line: []byte{0xff, 0xfe}, + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "malformed json truncated", + line: []byte(`{"object":"chat.completion.chunk","choices":`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "malformed json not json", + line: []byte(`not json`), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + { + name: "empty input", + line: []byte(``), + validate: func(t *testing.T, blocks []transcript.ContentBlock) { + assert.Empty(t, blocks) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + blocks := OpenAICompatibleToContentBlocks(tt.line) + tt.validate(t, blocks) + }) + } +} + +func TestOpenAICompatibleToContentBlocks_DeltaContentAndToolCall(t *testing.T) { + line := []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{"content":"hi","tool_calls":[{"id":"t1","function":{"name":"n","arguments":"{}"}}]}}]}`) + blocks := OpenAICompatibleToContentBlocks(line) + + require.Len(t, blocks, 2) + assert.Equal(t, transcript.BlockTypeText, blocks[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, blocks[0].Fidelity) + assert.Equal(t, "hi", blocks[0].Text) + + assert.Equal(t, transcript.BlockTypeToolUse, blocks[1].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, blocks[1].Fidelity) + assert.Equal(t, "n", blocks[1].ToolName) + assert.Equal(t, "t1", blocks[1].ToolID) +} + +func TestOpenAICompatibleToContentBlocks_MessageFallback(t *testing.T) { + line := []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{},"message":{"content":"fallback"}}]}`) + blocks := OpenAICompatibleToContentBlocks(line) + + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeText, blocks[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, blocks[0].Fidelity) + assert.Equal(t, "fallback", blocks[0].Text) +} + +func TestOpenAICompatibleToContentBlocks_DanglingToolUse(t *testing.T) { + line := []byte(`{"object":"chat.completion.chunk","choices":[{"delta":{"tool_calls":[{"id":"t1","function":{"name":"bash","arguments":"{}"}}]}}]}`) + blocks := OpenAICompatibleToContentBlocks(line) + + require.Len(t, blocks, 1) + assert.Equal(t, transcript.BlockTypeToolUse, blocks[0].Type) + assert.Equal(t, transcript.FidelityAgentEmitted, blocks[0].Fidelity) + assert.Equal(t, "bash", blocks[0].ToolName) + assert.Equal(t, "t1", blocks[0].ToolID) +} + +func TestOpenAICompatibleToContentBlocks_NoObjectOrChoices(t *testing.T) { + testCases := []struct { + name string + line []byte + }{ + { + name: "empty object", + line: []byte(`{"object":"","choices":[{"delta":{"content":"text"}}]}`), + }, + { + name: "empty choices", + line: []byte(`{"object":"chat.completion.chunk","choices":[]}`), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + blocks := OpenAICompatibleToContentBlocks(tc.line) + assert.Empty(t, blocks) + }) + } +} diff --git a/internal/infrastructure/agents/testdata/transcript/claude/input.jsonl b/internal/infrastructure/agents/testdata/transcript/claude/input.jsonl new file mode 100644 index 00000000..72eb97db --- /dev/null +++ b/internal/infrastructure/agents/testdata/transcript/claude/input.jsonl @@ -0,0 +1,4 @@ +{"type":"user","message":{"content":[{"type":"text","text":"Hello, can you help?"}]}} +{"type":"assistant","message":{"content":[{"type":"thinking","thinking":"Let me think about this carefully."},{"type":"text","text":"Hello, I can help with that."},{"type":"tool_use","id":"toolu_01ABC","name":"bash","input":{"command":"echo hello"}}]}} +{"type":"assistant","message":{"content":[{"type":"text","text":"The command ran successfully."}]}} +{"type":"assistant","message":{"content":[{"type":"text","text":""},{"type":"thinking","thinking":"Should I say more?"}]}} diff --git a/internal/infrastructure/agents/testdata/transcript/codex/input.jsonl b/internal/infrastructure/agents/testdata/transcript/codex/input.jsonl new file mode 100644 index 00000000..b2aa1cb1 Binary files /dev/null and b/internal/infrastructure/agents/testdata/transcript/codex/input.jsonl differ diff --git a/internal/infrastructure/agents/testdata/transcript/copilot/input.jsonl b/internal/infrastructure/agents/testdata/transcript/copilot/input.jsonl new file mode 100644 index 00000000..f69605e6 --- /dev/null +++ b/internal/infrastructure/agents/testdata/transcript/copilot/input.jsonl @@ -0,0 +1,4 @@ +{"type":"system.ready","data":{}} +{"type":"assistant.message","data":{"content":"Hello, I can assist you with that."}} +{"type":"tool.execution_start","data":{"toolName":"bash"}} +{"type":"assistant.message","data":{"content":"I've completed the task."}} diff --git a/internal/infrastructure/agents/testdata/transcript/gemini/input.jsonl b/internal/infrastructure/agents/testdata/transcript/gemini/input.jsonl new file mode 100644 index 00000000..bd0f2201 --- /dev/null +++ b/internal/infrastructure/agents/testdata/transcript/gemini/input.jsonl @@ -0,0 +1,4 @@ +{"type":"user","role":"user","content":"Hello, can you help?"} +{"type":"message","role":"assistant","content":"Hello! I'd be happy to help."} +{"type":"message","role":"assistant","content":"Let me run a command for you.","toolCalls":[{"name":"bash","arguments":{"command":"ls -la"}}]} +{"type":"message","role":"assistant","content":"","toolCalls":[{"name":"write_file","arguments":{"content":"done","path":"/tmp/out.txt"}}]} diff --git a/internal/infrastructure/agents/testdata/transcript/golden/claude.json b/internal/infrastructure/agents/testdata/transcript/golden/claude.json new file mode 100644 index 00000000..025364c3 --- /dev/null +++ b/internal/infrastructure/agents/testdata/transcript/golden/claude.json @@ -0,0 +1,31 @@ +[ + { + "type": "thinking", + "fidelity": "agent_emitted", + "thinking": "Let me think about this carefully." + }, + { + "type": "text", + "fidelity": "agent_emitted", + "text": "Hello, I can help with that." + }, + { + "type": "tool_use", + "fidelity": "agent_emitted", + "tool_name": "bash", + "tool_id": "toolu_01ABC", + "tool_input": { + "command": "echo hello" + } + }, + { + "type": "text", + "fidelity": "agent_emitted", + "text": "The command ran successfully." + }, + { + "type": "thinking", + "fidelity": "agent_emitted", + "thinking": "Should I say more?" + } +] diff --git a/internal/infrastructure/agents/testdata/transcript/golden/codex.json b/internal/infrastructure/agents/testdata/transcript/golden/codex.json new file mode 100644 index 00000000..047a28f0 --- /dev/null +++ b/internal/infrastructure/agents/testdata/transcript/golden/codex.json @@ -0,0 +1,18 @@ +[ + { + "type": "text", + "fidelity": "agent_emitted", + "text": "Hello\u0000world" + }, + { + "type": "tool_use", + "fidelity": "agent_emitted", + "tool_name": "bash", + "tool_input": "{\"command\":\"ls\"}" + }, + { + "type": "text", + "fidelity": "agent_emitted", + "text": "Task complete." + } +] diff --git a/internal/infrastructure/agents/testdata/transcript/golden/copilot.json b/internal/infrastructure/agents/testdata/transcript/golden/copilot.json new file mode 100644 index 00000000..d12bf51e --- /dev/null +++ b/internal/infrastructure/agents/testdata/transcript/golden/copilot.json @@ -0,0 +1,17 @@ +[ + { + "type": "text", + "fidelity": "agent_emitted", + "text": "Hello, I can assist you with that." + }, + { + "type": "tool_use", + "fidelity": "agent_emitted", + "tool_name": "bash" + }, + { + "type": "text", + "fidelity": "agent_emitted", + "text": "I've completed the task." + } +] diff --git a/internal/infrastructure/agents/testdata/transcript/golden/gemini.json b/internal/infrastructure/agents/testdata/transcript/golden/gemini.json new file mode 100644 index 00000000..53ec4a7b --- /dev/null +++ b/internal/infrastructure/agents/testdata/transcript/golden/gemini.json @@ -0,0 +1,29 @@ +[ + { + "type": "text", + "fidelity": "agent_emitted", + "text": "Hello! I'd be happy to help." + }, + { + "type": "text", + "fidelity": "agent_emitted", + "text": "Let me run a command for you." + }, + { + "type": "tool_use", + "fidelity": "agent_emitted", + "tool_name": "bash", + "tool_input": { + "command": "ls -la" + } + }, + { + "type": "tool_use", + "fidelity": "agent_emitted", + "tool_name": "write_file", + "tool_input": { + "content": "done", + "path": "/tmp/out.txt" + } + } +] diff --git a/internal/infrastructure/agents/testdata/transcript/golden/openaicompatible.json b/internal/infrastructure/agents/testdata/transcript/golden/openaicompatible.json new file mode 100644 index 00000000..30583ea2 --- /dev/null +++ b/internal/infrastructure/agents/testdata/transcript/golden/openaicompatible.json @@ -0,0 +1,19 @@ +[ + { + "type": "text", + "fidelity": "agent_emitted", + "text": "Hello! How can I help you?" + }, + { + "type": "tool_use", + "fidelity": "agent_emitted", + "tool_name": "bash", + "tool_id": "call_abc123", + "tool_input": "{\"command\":\"echo hello\"}" + }, + { + "type": "text", + "fidelity": "agent_emitted", + "text": "I've completed the task." + } +] diff --git a/internal/infrastructure/agents/testdata/transcript/openaicompatible/input.jsonl b/internal/infrastructure/agents/testdata/transcript/openaicompatible/input.jsonl new file mode 100644 index 00000000..13fd78ed --- /dev/null +++ b/internal/infrastructure/agents/testdata/transcript/openaicompatible/input.jsonl @@ -0,0 +1,4 @@ +{"object":"","choices":[]} +{"object":"chat.completion.chunk","choices":[{"delta":{"content":"Hello! How can I help you?"}}]} +{"object":"chat.completion.chunk","choices":[{"delta":{"tool_calls":[{"id":"call_abc123","function":{"name":"bash","arguments":"{\"command\":\"echo hello\"}"}}]}}]} +{"object":"chat.completion","choices":[{"message":{"content":"I've completed the task."}}]} diff --git a/internal/infrastructure/transcript/doc.go b/internal/infrastructure/transcript/doc.go new file mode 100644 index 00000000..dee19ebf --- /dev/null +++ b/internal/infrastructure/transcript/doc.go @@ -0,0 +1,185 @@ +// Package transcript implements the infrastructure adapter that persists agent +// exchange events to an append-only JSONL file and broadcasts them to in-process +// subscribers over buffered channels. +// +// It is the concrete implementation of ports.Recorder declared in the domain +// layer. Its position in the hexagonal architecture: infrastructure depends +// inward on internal/domain/transcript (value types) and internal/domain/ports +// (the Recorder interface and Logger). No application or interface layer symbol +// may appear in this package's import graph. +// +// # Purpose +// +// This package closes the loop between event emission and durable storage. +// Every ExchangeEvent recorded by the domain flows through Recorder.Record, +// which enforces two invariants: +// +// 1. Disk-first ordering (FR-003): the event is written to the JSONL file +// before it is broadcast to any subscriber. If the disk write fails the +// broadcast is suppressed, so subscribers only see events that are durably +// persisted. +// +// 2. Monotonic sequence numbers (D1): Seq is allocated by an atomic counter +// owned exclusively by Recorder. The counter is never delegated to the +// writer or the fanout so a single source guarantees ordering across +// concurrent callers and across fan-out paths. +// +// # Public Surface +// +// The public surface consists of the following symbols: +// +// - NewRecorder(path string, opts ...RecorderOption) (*Recorder, error) +// Opens or creates the JSONL file at path (parent directories are created +// with mode 0o700; the file itself is opened with mode 0o600 and O_APPEND). +// Initializes the internal FanOut broadcaster. Returns an error if the +// file cannot be opened. All functional options are applied before any +// I/O so the masker and logger are available for the first Record call. +// +// - (*Recorder).Record(ctx context.Context, event transcript.ExchangeEvent) error +// Validates the event (Type must not be empty; returns ports.ErrInvalidEvent +// otherwise), applies the masker if configured, allocates Seq if zero, +// writes to the JSONL file, and on success publishes to all subscribers. +// Honors ctx cancellation; returns ctx.Err() if the context is already done +// before the write begins. Non-blocking: FanOut.Publish never blocks. +// +// - (*Recorder).Subscribe() (<-chan transcript.ExchangeEvent, func()) +// Delegates to FanOut.Subscribe. Returns a buffered receive channel and an +// idempotent cancel closure. The channel is closed when either the cancel +// closure is called or Recorder.Close is invoked. +// +// - (*Recorder).Close() error +// Idempotent via sync.Once. Closes the FanOut (drains subscribers) then +// the JSONLWriter (flushes the OS buffer and closes the file descriptor). +// Second and subsequent calls return nil. +// +// - RecorderOption (functional option type) +// Passed to NewRecorder to tune behavior without breaking the constructor +// signature. Available options are documented below under Internal Layout. +// +// # Internal Layout +// +// Three non-test files carry the implementation: +// +// - recorder.go — Recorder struct, NewRecorder, Record, Subscribe, Close, and +// the three functional options. Owns the atomic Seq counter (seq uint64). +// Wires JSONLWriter and FanOut. Enforces disk-first ordering and nil-guards +// the optional masker before applying it. +// +// - jsonl_writer.go — JSONLWriter: mutex-protected O_APPEND file writer. Each +// Write call marshals the event to JSON, appends a newline, and writes the +// resulting bytes under a mutex. The mutex is held for the duration of the +// write syscall to guarantee atomicity beyond PIPE_BUF for large payloads. +// Parent directories are created with 0o700; the file is opened with 0o600. +// +// - fanout.go — FanOut: bounded pub-sub broadcaster. Each subscriber receives +// a buffered channel (default 256 events). When a subscriber's buffer is +// full, the newest event is dropped (oldest preserved) and the drop count is +// incremented atomically. WARN-level logging is rate-limited to one message +// per subscriber per second to prevent log flooding during high-loss periods. +// Subscribe and Close are idempotent via per-subscriber sync.Once semantics. +// +// - reader.go — Reader: tolerant JSONL reader for audit and replay. Skips +// empty lines; returns ErrLineMalformed (with line number context) on +// malformed JSON but continues reading subsequent lines. Tolerates unknown +// event types and block types without error (forward-compatibility policy). +// +// Functional options available via NewRecorder: +// +// - WithFanOutBufferSize(size int) RecorderOption +// Overrides the per-subscriber channel capacity (default 256). Must be +// called before the first Subscribe; changing buffer size after subscription +// has no effect on existing subscribers. +// +// - WithRecorderLogger(logger ports.Logger) RecorderOption +// Injects a structured logger for drop warnings and diagnostic output. +// Defaults to ports.NopLogger. The same logger instance is forwarded to +// the internal FanOut so all drop events share a single logger. +// +// - WithMasker(fn func(transcript.ExchangeEvent) transcript.ExchangeEvent) RecorderOption +// Registers a transformation function applied to every event in Record, +// before the disk write. The masker receives the event with Seq already +// allocated and must return a (possibly modified) ExchangeEvent. The +// returned value is what is written to disk AND what is broadcast to +// subscribers — masking is applied exactly once. Default is nil (no-op). +// +// # Threat Model +// +// The transcript infrastructure handles sensitive data (agent prompts, tool +// arguments, model responses). Threat scenarios addressed: +// +// - Sensitive data at rest: The JSONL file is created with mode 0o600 +// (owner read/write only). Parent directories are created with 0o700. +// No world-readable or group-readable bits are set. Operators running +// AWF as a shared user must apply additional filesystem-level access +// controls (ACLs, encrypted volumes) independently. +// +// - Append-only integrity: The file is opened with O_APPEND which makes +// each write atomic at the OS level for payloads up to PIPE_BUF (4 KiB +// on Linux). For larger payloads the JSONLWriter mutex extends atomicity +// to the full write. O_APPEND prevents seek-then-write races from +// concurrent processes opening the same path (multi-process invariant +// tested in jsonl_writer_atomicity_test.go). +// +// - Secret masking — deferred opt-in policy: Secret masking is NOT applied +// by default. Callers that require masking must inject WithMasker. This +// deferred opt-in policy is intentional (Notes§6): shipping a masker +// without a well-defined secret catalog would silently miss values, +// creating a false sense of protection. Callers that configure WithMasker +// are responsible for maintaining the catalog of sensitive field paths. +// The masker is applied before both the disk write and the fan-out +// broadcast so no raw secret escapes to either path once configured. +// +// - Subscriber isolation: Each subscriber's channel is independent and +// bounded. A slow subscriber cannot stall the Record call (FanOut.Publish +// is non-blocking), cannot exhaust memory beyond bufferSize events per +// subscriber, and cannot observe events that failed the disk write. +// Drop counts are tracked atomically per subscriber and in aggregate via +// FanOut.Stats. +// +// - Zero-value event guard: Record returns ports.ErrInvalidEvent immediately +// when event.Type is empty. This prevents zero-value ExchangeEvent structs +// (common programming mistakes) from polluting the transcript or the +// subscriber channels. The guard is checked before the masker is applied +// so the masker never receives invalid input. +// +// # Error Taxonomy +// +// Errors fall into three classes: +// +// - ports.ErrInvalidEvent: Returned by Record when event.Type is empty. +// The caller must supply a well-formed event. No file I/O is attempted. +// +// - Write errors (fmt.Errorf wrapping os.File errors): Returned by Record +// when the JSON marshal or the file write fails. The broadcast is +// suppressed. The file descriptor remains open; subsequent Record calls +// may succeed if the underlying condition (full disk, revoked permission) +// is resolved. Callers that require guaranteed delivery must handle write +// errors and retry or escalate. +// +// - Constructor errors (fmt.Errorf wrapping os.MkdirAll / os.OpenFile +// errors): Returned by NewRecorder when the file or its parent directory +// cannot be created or opened. The Recorder is nil; no resources are +// allocated. Callers must treat a non-nil constructor error as fatal. +// +// # Dependency Contract +// +// This package is permitted to import: +// +// - Standard library (bufio, context, encoding/json, errors, fmt, io, os, +// path/filepath, sync, sync/atomic, time) +// - github.com/google/uuid — used by FanOut to key subscribers. +// - go.uber.org/zap — permitted transitively via ports.Logger; the package +// does not import zap directly but the Logger interface is compatible. +// - internal/domain/transcript — ExchangeEvent, EventType, ContentBlock, +// and payload types. These are the only domain types referenced directly. +// - internal/domain/ports — ports.Recorder (interface implemented here), +// ports.Logger (injected via functional option), ports.ErrInvalidEvent, +// ports.NopLogger (default logger). +// +// It MUST NOT import: +// +// - internal/application — hexagonal rule: infrastructure must not depend +// on the application layer. +// - internal/interfaces — same hexagonal rule. +// - Any other internal package not listed above. +package transcript diff --git a/internal/infrastructure/transcript/fanout.go b/internal/infrastructure/transcript/fanout.go new file mode 100644 index 00000000..3cb21b41 --- /dev/null +++ b/internal/infrastructure/transcript/fanout.go @@ -0,0 +1,140 @@ +package transcript + +import ( + "sync" + "sync/atomic" + "time" + + "github.com/google/uuid" + + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" +) + +type FanOutOption func(*FanOut) + +type FanOutStats struct { + Subscribers int + Drops uint64 +} + +type subscriber struct { + ch chan transcript.ExchangeEvent + once sync.Once + dropCount atomic.Uint64 + lastWarnAt atomic.Int64 // unix nano +} + +type FanOut struct { + mu sync.RWMutex + subscribers map[uuid.UUID]*subscriber + bufferSize int + logger ports.Logger + closed bool + totalDrops atomic.Uint64 +} + +func NewFanOut(opts ...FanOutOption) *FanOut { + fo := &FanOut{ + subscribers: make(map[uuid.UUID]*subscriber), + bufferSize: 256, + logger: ports.NopLogger{}, + } + + for _, opt := range opts { + opt(fo) + } + + return fo +} + +func WithBufferSize(size int) FanOutOption { + return func(fo *FanOut) { + fo.bufferSize = size + } +} + +func WithLogger(logger ports.Logger) FanOutOption { + return func(fo *FanOut) { + fo.logger = logger + } +} + +func (fo *FanOut) Subscribe() (events <-chan transcript.ExchangeEvent, unsubscribe func()) { + fo.mu.Lock() + defer fo.mu.Unlock() + + if fo.closed { + return nil, func() {} + } + + id := uuid.New() + sub := &subscriber{ + ch: make(chan transcript.ExchangeEvent, fo.bufferSize), + } + fo.subscribers[id] = sub + + cancel := func() { + fo.mu.Lock() + delete(fo.subscribers, id) + fo.mu.Unlock() + sub.once.Do(func() { close(sub.ch) }) + } + + return sub.ch, cancel +} + +func (fo *FanOut) Publish(event transcript.ExchangeEvent) { //nolint:gocritic // hugeParam: value semantics required; callers pass struct literals matching the domain port contract + if event.Type == "" { + fo.logger.Warn("publish called with zero-value event") + return + } + + fo.mu.RLock() + defer fo.mu.RUnlock() + + for _, sub := range fo.subscribers { + select { + case sub.ch <- event: + default: + sub.dropCount.Add(1) + fo.totalDrops.Add(1) + last := sub.lastWarnAt.Load() + if now := time.Now().UnixNano(); now-last > int64(time.Second) { + sub.lastWarnAt.Store(now) + fo.logger.Warn("subscriber buffer full, dropping event") + } + } + } +} + +func (fo *FanOut) Stats() FanOutStats { + fo.mu.RLock() + subscribers := len(fo.subscribers) + fo.mu.RUnlock() + + return FanOutStats{ + Subscribers: subscribers, + Drops: fo.totalDrops.Load(), + } +} + +func (fo *FanOut) Close() error { + fo.mu.Lock() + if fo.closed { + fo.mu.Unlock() + return nil + } + fo.closed = true + subs := make([]*subscriber, 0, len(fo.subscribers)) + for _, sub := range fo.subscribers { + subs = append(subs, sub) + } + fo.subscribers = make(map[uuid.UUID]*subscriber) + fo.mu.Unlock() + + for _, sub := range subs { + sub.once.Do(func() { close(sub.ch) }) + } + return nil +} diff --git a/internal/infrastructure/transcript/fanout_test.go b/internal/infrastructure/transcript/fanout_test.go new file mode 100644 index 00000000..d26847bd --- /dev/null +++ b/internal/infrastructure/transcript/fanout_test.go @@ -0,0 +1,463 @@ +package transcript_test + +import ( + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" + infra "github.com/awf-project/cli/internal/infrastructure/transcript" +) + +// fakeLogger captures calls for assertion. +type fakeLogger struct { + mu sync.Mutex + warns []string + debugs []string + infos []string + errors []string + contexts []map[string]any +} + +func (fl *fakeLogger) Debug(msg string, fields ...any) { + fl.mu.Lock() + defer fl.mu.Unlock() + fl.debugs = append(fl.debugs, msg) +} + +func (fl *fakeLogger) Info(msg string, fields ...any) { + fl.mu.Lock() + defer fl.mu.Unlock() + fl.infos = append(fl.infos, msg) +} + +func (fl *fakeLogger) Warn(msg string, fields ...any) { + fl.mu.Lock() + defer fl.mu.Unlock() + fl.warns = append(fl.warns, msg) +} + +func (fl *fakeLogger) Error(msg string, fields ...any) { + fl.mu.Lock() + defer fl.mu.Unlock() + fl.errors = append(fl.errors, msg) +} + +func (fl *fakeLogger) WithContext(ctx map[string]any) ports.Logger { + fl.mu.Lock() + defer fl.mu.Unlock() + fl.contexts = append(fl.contexts, ctx) + return fl +} + +func (fl *fakeLogger) warnCount() int { + fl.mu.Lock() + defer fl.mu.Unlock() + return len(fl.warns) +} + +func TestFanOut_NewFanOutWithDefaults(t *testing.T) { + fo := infra.NewFanOut() + require.NotNil(t, fo) + stats := fo.Stats() + assert.Equal(t, 0, stats.Subscribers) + assert.Equal(t, uint64(0), stats.Drops) +} + +func TestFanOut_WithBufferSize(t *testing.T) { + fo := infra.NewFanOut(infra.WithBufferSize(512)) + require.NotNil(t, fo) + + ch, cancel := fo.Subscribe() + defer cancel() + + require.NotNil(t, ch) + stats := fo.Stats() + assert.Equal(t, 1, stats.Subscribers) +} + +func TestFanOut_WithLogger(t *testing.T) { + fakeLog := &fakeLogger{} + fo := infra.NewFanOut(infra.WithLogger(fakeLog)) + require.NotNil(t, fo) + + ch, cancel := fo.Subscribe() + defer cancel() + + event := transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + RunID: "run-123", + } + fo.Publish(event) + + select { + case received := <-ch: + assert.Equal(t, transcript.EventTypeRunStarted, received.Type) + assert.Equal(t, "run-123", received.RunID) + case <-time.After(100 * time.Millisecond): + t.Fatal("timeout waiting for event") + } +} + +func TestFanOut_SubscribeReturnsChannelAndCancel(t *testing.T) { + fo := infra.NewFanOut() + ch, cancel := fo.Subscribe() + + require.NotNil(t, ch) + require.NotNil(t, cancel) + + event := transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + RunID: "run-123", + } + fo.Publish(event) + + received := <-ch + assert.Equal(t, transcript.EventTypeRunStarted, received.Type) + + cancel() + + select { + case _, ok := <-ch: + assert.False(t, ok, "channel should be closed after cancel") + case <-time.After(100 * time.Millisecond): + } +} + +func TestFanOut_PublishToMultipleSubscribers(t *testing.T) { + fo := infra.NewFanOut() + + ch1, cancel1 := fo.Subscribe() + defer cancel1() + + ch2, cancel2 := fo.Subscribe() + defer cancel2() + + require.Equal(t, 2, fo.Stats().Subscribers) + + event := transcript.ExchangeEvent{ + Type: transcript.EventTypeMessageUser, + RunID: "run-456", + } + fo.Publish(event) + + received1 := <-ch1 + received2 := <-ch2 + + assert.Equal(t, event.Type, received1.Type) + assert.Equal(t, event.Type, received2.Type) + assert.Equal(t, event.RunID, received1.RunID) + assert.Equal(t, event.RunID, received2.RunID) +} + +func TestFanOut_BoundedBufferDropsNewest(t *testing.T) { + fo := infra.NewFanOut(infra.WithBufferSize(4)) + ch, cancel := fo.Subscribe() + defer cancel() + + // Fill buffer with 4 events + for i := range 4 { + event := transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + RunID: "run-123", + Seq: uint64(i + 1), + } + fo.Publish(event) + } + + // Publish 5th event; should be dropped (drop-newest policy) + fo.Publish(transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + RunID: "run-123", + Seq: 5, + }) + + // Verify first 4 events are in channel (drop-newest = newest dropped, oldest preserved) + received1 := <-ch + assert.Equal(t, uint64(1), received1.Seq) + + received2 := <-ch + assert.Equal(t, uint64(2), received2.Seq) + + received3 := <-ch + assert.Equal(t, uint64(3), received3.Seq) + + received4 := <-ch + assert.Equal(t, uint64(4), received4.Seq) + + // Stats should show 1 drop + stats := fo.Stats() + assert.Greater(t, stats.Drops, uint64(0)) +} + +func TestFanOut_SlowSubscriberDoesNotBlockProducer(t *testing.T) { + fo := infra.NewFanOut() + ch, cancel := fo.Subscribe() + defer cancel() + + eventCount := atomic.Int32{} + start := time.Now() + + // Subscriber sleeps (blocks) + go func() { + for range ch { + time.Sleep(10 * time.Millisecond) + } + }() + + // Producer publishes non-blocking + for i := range 100 { + fo.Publish(transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + RunID: "run-slow", + Seq: uint64(i), + }) + eventCount.Add(1) + } + + elapsed := time.Since(start) + + // Producer should finish quickly (100ms ~ 1-2x buffer drain time) + // NOT 1000ms (100 events * 10ms sleep) + assert.Less(t, elapsed, 500*time.Millisecond, + "producer should not block on slow subscriber (expected <500ms, got %v)", elapsed) + + cancel() + time.Sleep(100 * time.Millisecond) +} + +func TestFanOut_StatsExposeDropCount(t *testing.T) { + fo := infra.NewFanOut(infra.WithBufferSize(2)) + ch, cancel := fo.Subscribe() + + // Publish 10 events with only 2-size buffer + for i := range 10 { + fo.Publish(transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + RunID: "run-drop-test", + Seq: uint64(i), + }) + } + + // Drain channel to make room + _ = <-ch + _ = <-ch + + cancel() + + // Stats should reflect drops + stats := fo.Stats() + assert.Greater(t, stats.Drops, uint64(0), + "drops should be greater than 0 after buffer overflow") +} + +func TestFanOut_IdempotentClose(t *testing.T) { + fo := infra.NewFanOut() + ch, cancel := fo.Subscribe() + defer cancel() + + err1 := fo.Close() + require.NoError(t, err1) + + // Drain any pending events + select { + case <-ch: + default: + } + + err2 := fo.Close() + assert.NoError(t, err2, "second Close should also return nil") + + err3 := fo.Close() + assert.NoError(t, err3, "third Close should also return nil") + + _ = ch +} + +func TestFanOut_IdempotentSubscriberCancel(t *testing.T) { + fo := infra.NewFanOut() + ch, cancel := fo.Subscribe() + + // First cancel + cancel() + + // Verify channel is closed + select { + case _, ok := <-ch: + assert.False(t, ok, "channel should be closed") + case <-time.After(100 * time.Millisecond): + t.Fatal("timeout waiting for channel close") + } + + // Subsequent cancels should not panic + assert.NotPanics(t, func() { + cancel() + }) + + assert.NotPanics(t, func() { + cancel() + }) +} + +func TestFanOut_ZeroEventWarnsAndReturns(t *testing.T) { + fakeLog := &fakeLogger{} + fo := infra.NewFanOut(infra.WithLogger(fakeLog)) + _, cancel := fo.Subscribe() + defer cancel() + + // Publish zero-value event + assert.NotPanics(t, func() { + fo.Publish(transcript.ExchangeEvent{}) + }) + + // Should emit WARN via logger + assert.Greater(t, fakeLog.warnCount(), 0, + "publishing zero-value event should emit WARN") +} + +func TestFanOut_MultipleSubscribersIndependentDrops(t *testing.T) { + fo := infra.NewFanOut(infra.WithBufferSize(3)) + + ch1, cancel1 := fo.Subscribe() + ch2, cancel2 := fo.Subscribe() + defer cancel1() + defer cancel2() + + // Publish 5 events + for i := range 5 { + fo.Publish(transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + RunID: "multi-sub", + Seq: uint64(i), + }) + } + + // ch1 subscriber processes events + _ = <-ch1 + _ = <-ch1 + + // ch2 subscriber is slow; events will drop for it + _ = <-ch2 + + stats := fo.Stats() + assert.Greater(t, stats.Drops, uint64(0), + "drops should reflect slow subscriber buffer overflow") +} + +func TestFanOut_RateLimitedWarnPerSubscriber(t *testing.T) { + fakeLog := &fakeLogger{} + fo := infra.NewFanOut( + infra.WithBufferSize(1), + infra.WithLogger(fakeLog), + ) + + _, cancel := fo.Subscribe() + defer cancel() + + // Trigger drops within 1s window + fo.Publish(transcript.ExchangeEvent{Type: transcript.EventTypeRunStarted}) + fo.Publish(transcript.ExchangeEvent{Type: transcript.EventTypeRunStarted}) // Drop 1 + fo.Publish(transcript.ExchangeEvent{Type: transcript.EventTypeRunStarted}) // Drop 2 + + initialWarnCount := fakeLog.warnCount() + + // Publish again immediately (should NOT warn due to 1s rate limit) + fo.Publish(transcript.ExchangeEvent{Type: transcript.EventTypeRunStarted}) + fo.Publish(transcript.ExchangeEvent{Type: transcript.EventTypeRunStarted}) + + afterWarnCount := fakeLog.warnCount() + + // Should have warned for initial drops, but rate-limited subsequent warns + assert.Greater(t, initialWarnCount, 0, "should warn on drop") + // After rate limit, no new warns in same 1s window + assert.Equal(t, initialWarnCount, afterWarnCount, + "warns should be rate-limited to 1 per 1s per subscriber") +} + +func TestFanOut_CloseClosesAllSubscribers(t *testing.T) { + fo := infra.NewFanOut() + + ch1, cancel1 := fo.Subscribe() + ch2, cancel2 := fo.Subscribe() + defer cancel1() + defer cancel2() + + err := fo.Close() + require.NoError(t, err) + + // Both channels should be closed + time.Sleep(50 * time.Millisecond) + + select { + case _, ok := <-ch1: + assert.False(t, ok, "ch1 should be closed") + case <-time.After(100 * time.Millisecond): + t.Fatal("timeout waiting for ch1 close") + } + + select { + case _, ok := <-ch2: + assert.False(t, ok, "ch2 should be closed") + case <-time.After(100 * time.Millisecond): + t.Fatal("timeout waiting for ch2 close") + } +} + +func TestFanOut_PublishAfterCloseDoesNotCrash(t *testing.T) { + fo := infra.NewFanOut() + ch, cancel := fo.Subscribe() + defer cancel() + fo.Close() + _ = ch + + assert.NotPanics(t, func() { + fo.Publish(transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + }) + }) +} + +func TestFanOut_SubscribeAfterCloseReturnsNilChannel(t *testing.T) { + fo := infra.NewFanOut() + fo.Close() + + ch, cancel := fo.Subscribe() + defer cancel() + + // After close, Subscribe should either return nil or a closed channel + // Verify it doesn't panic when we try to use it + assert.NotPanics(t, func() { + select { + case _, ok := <-ch: + if ch != nil { + assert.False(t, ok, "channel should be closed") + } + case <-time.After(50 * time.Millisecond): + } + }) +} + +func TestFanOut_StatsSubscriberCount(t *testing.T) { + fo := infra.NewFanOut() + + assert.Equal(t, 0, fo.Stats().Subscribers) + + _, cancel1 := fo.Subscribe() + assert.Equal(t, 1, fo.Stats().Subscribers) + + _, cancel2 := fo.Subscribe() + assert.Equal(t, 2, fo.Stats().Subscribers) + + cancel1() + time.Sleep(50 * time.Millisecond) + assert.Equal(t, 1, fo.Stats().Subscribers) + + cancel2() + time.Sleep(50 * time.Millisecond) + assert.Equal(t, 0, fo.Stats().Subscribers) +} diff --git a/internal/infrastructure/transcript/jsonl_writer.go b/internal/infrastructure/transcript/jsonl_writer.go new file mode 100644 index 00000000..5e9b0319 --- /dev/null +++ b/internal/infrastructure/transcript/jsonl_writer.go @@ -0,0 +1,70 @@ +package transcript + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +// JSONLWriter appends ExchangeEvent records to a JSONL file. +// Mutex is held on every write to guarantee atomicity beyond PIPE_BUF. +type JSONLWriter struct { + path string + f *os.File + mu sync.Mutex + closeOnce sync.Once + closeErr error +} + +// NewJSONLWriter opens or creates the JSONL transcript file at path. +// Parent directories are created with mode 0o700. File mode is 0o600. +func NewJSONLWriter(path string) (*JSONLWriter, error) { + cleanPath := filepath.Clean(path) + if err := os.MkdirAll(filepath.Dir(cleanPath), 0o700); err != nil { + return nil, fmt.Errorf("creating transcript directory: %w", err) + } + + f, err := os.OpenFile(cleanPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o600) //nolint:gosec // path is cleaned above; callers control the input + if err != nil { + return nil, fmt.Errorf("opening transcript file: %w", err) + } + + return &JSONLWriter{path: cleanPath, f: f}, nil +} + +// Write marshals event to JSON, appends a newline, and writes atomically under lock. +// Returns ctx.Err() immediately if the context is already done. +func (w *JSONLWriter) Write(ctx context.Context, event transcript.ExchangeEvent) error { //nolint:gocritic // hugeParam: value receiver required so json.Marshal(event) invokes the custom MarshalJSON on value receiver + if err := ctx.Err(); err != nil { + return fmt.Errorf("transcript write canceled: %w", err) + } + + jsonBytes, err := json.Marshal(event) + if err != nil { + return fmt.Errorf("marshaling exchange event: %w", err) + } + + jsonBytes = append(jsonBytes, '\n') + + w.mu.Lock() + defer w.mu.Unlock() + + if _, err := w.f.Write(jsonBytes); err != nil { + return fmt.Errorf("writing exchange event: %w", err) + } + + return nil +} + +// Close closes the underlying file exactly once. Subsequent calls return nil. +func (w *JSONLWriter) Close() error { + w.closeOnce.Do(func() { + w.closeErr = w.f.Close() + }) + return w.closeErr +} diff --git a/internal/infrastructure/transcript/jsonl_writer_atomicity_test.go b/internal/infrastructure/transcript/jsonl_writer_atomicity_test.go new file mode 100644 index 00000000..d39db7cd --- /dev/null +++ b/internal/infrastructure/transcript/jsonl_writer_atomicity_test.go @@ -0,0 +1,136 @@ +//go:build !race + +// This test file carries the !race build tag. Spawning child processes via +// os/exec causes the race detector to emit false-positive reports: each +// subprocess opens the JSONL file independently so no Go-level race exists, +// but the detector cannot observe cross-process memory. In-process +// concurrency safety is already covered by T040's goroutine-level concurrent +// write test, which runs under -race. + +package transcript + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "sync" + "testing" + "time" + + domaintranscript "github.com/awf-project/cli/internal/domain/transcript" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ( + childMarkerEnv = "JSONL_WRITER_CHILD" + childPathEnv = "JSONL_WRITER_PATH" + multiProcN = 4 + multiProcK = 500 +) + +// TestJSONLWriterChild is the subprocess entrypoint invoked by +// TestJSONLWriter_MultiProcessO_APPEND_Atomic via os/exec re-exec. +// Skips silently when JSONL_WRITER_CHILD is unset to prevent accidental execution. +func TestJSONLWriterChild(t *testing.T) { + if os.Getenv(childMarkerEnv) != "1" { + t.Skip("not a child process") + } + + path := os.Getenv(childPathEnv) + require.NotEmpty(t, path, "child: %s env var must be set", childPathEnv) + + pid := os.Getpid() + writer, err := NewJSONLWriter(path) + require.NoError(t, err) + defer func() { _ = writer.Close() }() + + ctx := context.Background() + for i := range multiProcK { + event := domaintranscript.ExchangeEvent{ + Seq: uint64(i), //nolint:gosec // G115: controlled test input (max 499, well within uint64) + RunID: "multiprocess-atomicity", + Type: domaintranscript.EventTypeStepStarted, + Path: "/test/multiprocess", + Iteration: i, + Timestamp: time.Now(), + Payload: &domaintranscript.StepPayload{ + Name: fmt.Sprintf("pid-%d", pid), + Kind: "atomicity-test", + }, + } + require.NoError(t, writer.Write(ctx, event)) + } +} + +func TestJSONLWriter_MultiProcessO_APPEND_Atomic(t *testing.T) { + tmpDir := t.TempDir() + transcriptPath := filepath.Join(tmpDir, "transcript.jsonl") + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + var wg sync.WaitGroup + errs := make([]error, multiProcN) + + for i := range multiProcN { + wg.Add(1) + go func(idx int) { + defer wg.Done() + cmd := exec.CommandContext(ctx, os.Args[0], "-test.run", "^TestJSONLWriterChild$") //nolint:gosec // G702: os.Args[0] is the current test binary; not user-controlled input + cmd.Env = append( + os.Environ(), + childMarkerEnv+"=1", + childPathEnv+"="+transcriptPath, + ) + if out, err := cmd.CombinedOutput(); err != nil { + errs[idx] = fmt.Errorf("child %d: %w\noutput: %s", idx, err, out) + } + }(i) + } + + wg.Wait() + + for _, err := range errs { + require.NoError(t, err) + } + + data, err := os.ReadFile(transcriptPath) + require.NoError(t, err) + + scanner := bufio.NewScanner(bytes.NewReader(data)) + lineCount := 0 + pidCounts := map[string]int{} + + for scanner.Scan() { + line := scanner.Text() + if line == "" { + continue + } + lineCount++ + + var event domaintranscript.ExchangeEvent + require.NoError(t, json.Unmarshal([]byte(line), &event), + "line %d must be valid JSON", lineCount) + + payload, ok := event.Payload.(*domaintranscript.StepPayload) + require.True(t, ok, "line %d payload must be *StepPayload, got %T", lineCount, event.Payload) + pidCounts[payload.Name]++ + } + require.NoError(t, scanner.Err()) + + assert.Equal(t, multiProcN*multiProcK, lineCount, + "total line count must be exactly %d", multiProcN*multiProcK) + assert.Len(t, pidCounts, multiProcN, + "all %d children must have contributed lines", multiProcN) + + for pidTag, count := range pidCounts { + assert.Equal(t, multiProcK, count, + "child %s must have written exactly %d lines", pidTag, multiProcK) + } +} diff --git a/internal/infrastructure/transcript/jsonl_writer_test.go b/internal/infrastructure/transcript/jsonl_writer_test.go new file mode 100644 index 00000000..b4542009 --- /dev/null +++ b/internal/infrastructure/transcript/jsonl_writer_test.go @@ -0,0 +1,411 @@ +package transcript_test + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/awf-project/cli/internal/domain/transcript" + infra "github.com/awf-project/cli/internal/infrastructure/transcript" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/sync/errgroup" +) + +func TestJSONLWriter_CreatesFileAt0600(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "transcript.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + t.Cleanup(func() { _ = writer.Close() }) + + info, err := os.Stat(path) + require.NoError(t, err) + + perm := info.Mode().Perm() + assert.Equal(t, os.FileMode(0o600), perm, "file should have 0o600 permissions") +} + +func TestJSONLWriter_CreatesParentDirs(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "nested", "deep", "path", "transcript.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + t.Cleanup(func() { _ = writer.Close() }) + + _, err = os.Stat(path) + assert.NoError(t, err, "file should be created with parent directories") + + parentDir := filepath.Dir(path) + info, err := os.Stat(parentDir) + require.NoError(t, err) + assert.True(t, info.IsDir(), "parent directory should be created") +} + +func TestJSONLWriter_AppendsNewlineDelimitedJSON(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "transcript.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + t.Cleanup(func() { _ = writer.Close() }) + + ctx := context.Background() + events := make([]transcript.ExchangeEvent, 5) + for i := range 5 { + events[i] = transcript.ExchangeEvent{ + Seq: uint64(i), + RunID: fmt.Sprintf("run-%d", i), + Type: transcript.EventTypeRunStarted, + Path: "/test/path", + Iteration: i, + Timestamp: time.Now(), + Payload: nil, + } + err := writer.Write(ctx, events[i]) + require.NoError(t, err) + } + + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + scanner := bufio.NewScanner(file) + lineNum := 0 + for scanner.Scan() { + line := scanner.Text() + require.NotEmpty(t, line, "line should not be empty") + + var event transcript.ExchangeEvent + err := json.Unmarshal([]byte(line), &event) + require.NoError(t, err, "line %d should be valid JSON", lineNum) + assert.Equal(t, events[lineNum].Seq, event.Seq) + assert.Equal(t, events[lineNum].RunID, event.RunID) + + lineNum++ + } + require.NoError(t, scanner.Err()) + assert.Equal(t, 5, lineNum, "should have written exactly 5 lines") +} + +func TestJSONLWriter_IdempotentClose(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "transcript.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + + err1 := writer.Close() + assert.NoError(t, err1, "first Close should return nil") + + err2 := writer.Close() + assert.NoError(t, err2, "second Close should return nil (idempotent)") +} + +func TestJSONLWriter_ConcurrentWritesSerialized(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "transcript.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + t.Cleanup(func() { _ = writer.Close() }) + + const numGoroutines = 64 + const numWrites = 100 + const totalWrites = numGoroutines * numWrites + + ctx := context.Background() + eg := errgroup.Group{} + + for g := range numGoroutines { + goroutineID := g + eg.Go(func() error { + for w := range numWrites { + event := transcript.ExchangeEvent{ + Seq: uint64(goroutineID*numWrites + w), //nolint:gosec // G115: controlled test input (max 6399, well within uint64) + RunID: "concurrent-test", + Type: transcript.EventTypeRunStarted, + Path: fmt.Sprintf("/test/goroutine-%d", goroutineID), + Iteration: w, + Timestamp: time.Now(), + Payload: nil, + } + if err := writer.Write(ctx, event); err != nil { + return fmt.Errorf("write error: %w", err) + } + } + return nil + }) + } + require.NoError(t, eg.Wait()) + + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + scanner := bufio.NewScanner(file) + lineCount := 0 + for scanner.Scan() { + line := scanner.Text() + var event transcript.ExchangeEvent + err := json.Unmarshal([]byte(line), &event) + require.NoError(t, err, "line %d should be valid JSON (no torn writes)", lineCount) + + lineCount++ + } + require.NoError(t, scanner.Err()) + assert.Equal(t, totalWrites, lineCount, "should have written all %d events without tearing", totalWrites) +} + +func TestJSONLWriter_PayloadBeyondPipeBuf(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "transcript.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + t.Cleanup(func() { _ = writer.Close() }) + + largePayload := strings.Repeat("x", 16*1024) + + ctx := context.Background() + event := transcript.ExchangeEvent{ + Seq: 1, + RunID: "large-payload-test", + Type: transcript.EventTypeRunStarted, + Path: "/test/path", + Iteration: 0, + Timestamp: time.Now(), + Payload: largePayload, + } + err = writer.Write(ctx, event) + require.NoError(t, err) + + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + scanner := bufio.NewScanner(file) + lineCount := 0 + for scanner.Scan() { + line := scanner.Text() + var readEvent transcript.ExchangeEvent + err := json.Unmarshal([]byte(line), &readEvent) + require.NoError(t, err, "large payload should unmarshal as single line") + + payload, ok := readEvent.Payload.(string) + require.True(t, ok, "payload should be a string") + assert.Equal(t, largePayload, payload, "payload should round-trip correctly") + + lineCount++ + } + require.NoError(t, scanner.Err()) + assert.Equal(t, 1, lineCount, "large payload should result in exactly one line") +} + +func TestJSONLWriter_ContextCancelled(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "transcript.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + t.Cleanup(func() { _ = writer.Close() }) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + event := transcript.ExchangeEvent{ + Seq: 1, + RunID: "cancelled-test", + Type: transcript.EventTypeRunStarted, + Path: "/test/path", + Iteration: 0, + Timestamp: time.Now(), + Payload: nil, + } + + err = writer.Write(ctx, event) + assert.ErrorIs(t, err, context.Canceled, "Write should return context.Canceled") + + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + scanner := bufio.NewScanner(file) + lineCount := 0 + for scanner.Scan() { + lineCount++ + } + require.NoError(t, scanner.Err()) + assert.Equal(t, 0, lineCount, "cancelled context should prevent write") +} + +func TestJSONLWriter_ContextDeadlineExceeded(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "transcript.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + t.Cleanup(func() { _ = writer.Close() }) + + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(-1*time.Second)) + defer cancel() + + event := transcript.ExchangeEvent{ + Seq: 1, + RunID: "deadline-test", + Type: transcript.EventTypeRunStarted, + Path: "/test/path", + Iteration: 0, + Timestamp: time.Now(), + Payload: nil, + } + + err = writer.Write(ctx, event) + assert.ErrorIs(t, err, context.DeadlineExceeded, "Write should return context.DeadlineExceeded") + + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + scanner := bufio.NewScanner(file) + lineCount := 0 + for scanner.Scan() { + lineCount++ + } + require.NoError(t, scanner.Err()) + assert.Equal(t, 0, lineCount, "deadline exceeded should prevent write") +} + +func TestJSONLWriter_MultipleWritesSequential(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "transcript.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + t.Cleanup(func() { _ = writer.Close() }) + + ctx := context.Background() + event1 := transcript.ExchangeEvent{ + Seq: 1, + RunID: "run-1", + Type: transcript.EventTypeRunStarted, + Path: "/test/path", + Iteration: 0, + Timestamp: time.Now(), + Payload: "payload1", + } + + event2 := transcript.ExchangeEvent{ + Seq: 2, + RunID: "run-1", + Type: transcript.EventTypeRunCompleted, + Path: "/test/path", + Iteration: 0, + Timestamp: time.Now(), + Payload: "payload2", + } + + err = writer.Write(ctx, event1) + require.NoError(t, err) + + err = writer.Write(ctx, event2) + require.NoError(t, err) + + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + scanner := bufio.NewScanner(file) + lines := []string{} + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + require.NoError(t, scanner.Err()) + + require.Len(t, lines, 2) + + var readEvent1, readEvent2 transcript.ExchangeEvent + err = json.Unmarshal([]byte(lines[0]), &readEvent1) + require.NoError(t, err) + err = json.Unmarshal([]byte(lines[1]), &readEvent2) + require.NoError(t, err) + + assert.Equal(t, event1.Seq, readEvent1.Seq) + assert.Equal(t, event2.Seq, readEvent2.Seq) +} + +func TestJSONLWriter_FileModeWithDifferentPath(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "custom_name.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + t.Cleanup(func() { _ = writer.Close() }) + + info, err := os.Stat(path) + require.NoError(t, err) + + perm := info.Mode().Perm() + assert.Equal(t, os.FileMode(0o600), perm, "custom named file should also have 0o600 permissions") +} + +func TestJSONLWriter_ParentDirsWithMode(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "level1", "level2", "level3", "transcript.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + t.Cleanup(func() { _ = writer.Close() }) + + parentDir := filepath.Join(tmpDir, "level1", "level2", "level3") + info, err := os.Stat(parentDir) + require.NoError(t, err) + + parentPerm := info.Mode().Perm() + assert.Equal(t, os.FileMode(0o700), parentPerm, "parent directory should have 0o700 permissions") +} + +func TestJSONLWriter_WriteWithValidContext(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "transcript.jsonl") + + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + t.Cleanup(func() { _ = writer.Close() }) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + event := transcript.ExchangeEvent{ + Seq: 1, + RunID: "test-run", + Type: transcript.EventTypeRunStarted, + Path: "/test/path", + Iteration: 0, + Timestamp: time.Now(), + Payload: "test", + } + + err = writer.Write(ctx, event) + assert.NoError(t, err, "write should succeed with valid context") + + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + scanner := bufio.NewScanner(file) + require.True(t, scanner.Scan()) + + var readEvent transcript.ExchangeEvent + err = json.Unmarshal([]byte(scanner.Text()), &readEvent) + require.NoError(t, err) + assert.Equal(t, event.Seq, readEvent.Seq) +} diff --git a/internal/infrastructure/transcript/reader.go b/internal/infrastructure/transcript/reader.go new file mode 100644 index 00000000..07e1533d --- /dev/null +++ b/internal/infrastructure/transcript/reader.go @@ -0,0 +1,183 @@ +package transcript + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "io" + "time" + + "github.com/awf-project/cli/internal/domain/transcript" +) + +var ErrLineMalformed = errors.New("malformed JSONL line") + +type Reader struct { + dec *json.Decoder + lineNum int +} + +// NewReader wraps r in a 4 MiB bufio.Reader so that single-line events larger +// than the default 4 KiB read buffer are handled without extra round-trips. +// json.Decoder is used rather than bufio.Scanner so that multi-line JSON values +// (e.g. pretty-printed payloads in tests) are tolerated. +func NewReader(r io.Reader) *Reader { + buf := bufio.NewReaderSize(r, 4<<20) + return &Reader{dec: json.NewDecoder(buf)} +} + +func (r *Reader) Read() (transcript.ExchangeEvent, error) { + var raw rawEvent + if err := r.dec.Decode(&raw); err != nil { + if errors.Is(err, io.EOF) { + return transcript.ExchangeEvent{}, io.EOF + } + r.lineNum++ + return transcript.ExchangeEvent{}, fmt.Errorf("line %d: %w", r.lineNum, ErrLineMalformed) + } + r.lineNum++ + + event := transcript.ExchangeEvent{ + Seq: raw.Seq, + RunID: raw.RunID, + ParentRunID: raw.ParentRunID, + ChildRunID: raw.ChildRunID, + Type: raw.Type, + Path: raw.Path, + Iteration: raw.Iteration, + Timestamp: raw.Timestamp, + } + + if len(raw.Payload) > 0 && string(raw.Payload) != "null" { + payload, err := tolerantDispatchPayload(raw.Payload) + if err != nil { + return transcript.ExchangeEvent{}, fmt.Errorf("line %d: %w", r.lineNum, ErrLineMalformed) + } + event.Payload = payload + } + + return event, nil +} + +func (r *Reader) ReadAll() ([]transcript.ExchangeEvent, error) { + var events []transcript.ExchangeEvent + for { + event, err := r.Read() + if errors.Is(err, io.EOF) { + return events, nil + } + if err != nil { + return events, err + } + events = append(events, event) + } +} + +// rawEvent mirrors ExchangeEvent without the custom UnmarshalJSON, allowing +// unknown event types to be decoded without error (forward-compatibility policy). +type rawEvent struct { + Seq uint64 `json:"seq"` + RunID string `json:"run_id"` + ParentRunID string `json:"parent_run_id,omitempty"` + ChildRunID string `json:"child_run_id,omitempty"` + Type transcript.EventType `json:"type"` + Path string `json:"path"` + Iteration int `json:"iteration"` + Timestamp time.Time `json:"timestamp"` + Payload json.RawMessage `json:"payload"` +} + +// rawContentBlock mirrors ContentBlock without the custom UnmarshalJSON, allowing +// unknown block types to be preserved verbatim (forward-compatibility policy). +type rawContentBlock struct { + Type transcript.BlockType `json:"type"` + Fidelity transcript.Fidelity `json:"fidelity"` + Text string `json:"text,omitempty"` + Thinking string `json:"thinking,omitempty"` + ToolName string `json:"tool_name,omitempty"` + ToolID string `json:"tool_id,omitempty"` + ToolInput any `json:"tool_input,omitempty"` + ToolContent any `json:"tool_content,omitempty"` + Command string `json:"command,omitempty"` + Chunk string `json:"chunk,omitempty"` +} + +func decodeAsAny(raw json.RawMessage, context string) (any, error) { + var v any + if err := json.Unmarshal(raw, &v); err != nil { + return nil, fmt.Errorf("decoding %s: %w", context, err) + } + return v, nil +} + +func tolerantDispatchPayload(raw json.RawMessage) (any, error) { + if len(raw) > 0 && raw[0] == '[' { + var rawBlocks []rawContentBlock + if err := json.Unmarshal(raw, &rawBlocks); err != nil { + return decodeAsAny(raw, "content block array") + } + blocks := make([]transcript.ContentBlock, len(rawBlocks)) + for i := range rawBlocks { + blocks[i] = toContentBlock(&rawBlocks[i]) + } + return blocks, nil + } + + var probe map[string]json.RawMessage + if err := json.Unmarshal(raw, &probe); err != nil { + return decodeAsAny(raw, "payload") + } + + if probe["role"] != nil { + type rawMessagePayload struct { + Role string `json:"role"` + Blocks []rawContentBlock `json:"blocks"` + } + var rmp rawMessagePayload + if err := json.Unmarshal(raw, &rmp); err != nil { + return nil, fmt.Errorf("decoding message payload: %w", err) + } + mp := &transcript.MessagePayload{ + Role: rmp.Role, + Blocks: make([]transcript.ContentBlock, len(rmp.Blocks)), + } + for i := range rmp.Blocks { + mp.Blocks[i] = toContentBlock(&rmp.Blocks[i]) + } + return mp, nil + } + + if probe["call_id"] != nil { + var p transcript.ToolPayload + if err := json.Unmarshal(raw, &p); err != nil { + return nil, fmt.Errorf("decoding tool payload: %w", err) + } + return &p, nil + } + + if probe["kind"] != nil { + var p transcript.StepPayload + if err := json.Unmarshal(raw, &p); err != nil { + return nil, fmt.Errorf("decoding step payload: %w", err) + } + return &p, nil + } + + return decodeAsAny(raw, "generic payload") +} + +func toContentBlock(rb *rawContentBlock) transcript.ContentBlock { //nolint:gocritic // pointer receiver avoids 160-byte copy per element + return transcript.ContentBlock{ + Type: rb.Type, + Fidelity: rb.Fidelity, + Text: rb.Text, + Thinking: rb.Thinking, + ToolName: rb.ToolName, + ToolID: rb.ToolID, + ToolInput: rb.ToolInput, + ToolContent: rb.ToolContent, + Command: rb.Command, + Chunk: rb.Chunk, + } +} diff --git a/internal/infrastructure/transcript/reader_test.go b/internal/infrastructure/transcript/reader_test.go new file mode 100644 index 00000000..3c09f550 --- /dev/null +++ b/internal/infrastructure/transcript/reader_test.go @@ -0,0 +1,255 @@ +package transcript_test + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/awf-project/cli/internal/domain/transcript" + infra "github.com/awf-project/cli/internal/infrastructure/transcript" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestReader_RoundTripsValidJSONL(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "transcript.jsonl") + + // Write 100 events using the JSONL writer + writer, err := infra.NewJSONLWriter(path) + require.NoError(t, err) + + ctx := context.Background() + written := make([]transcript.ExchangeEvent, 100) + for i := range 100 { + written[i] = transcript.ExchangeEvent{ + Seq: uint64(i), + RunID: fmt.Sprintf("run-%d", i), + Type: transcript.EventTypeMessageUser, + Path: "/test/path", + Iteration: i, + Timestamp: time.Now(), + Payload: &transcript.MessagePayload{ + Role: "user", + Blocks: []transcript.ContentBlock{ + { + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityRouter, + Text: fmt.Sprintf("Event %d", i), + }, + }, + }, + } + err := writer.Write(ctx, written[i]) + require.NoError(t, err) + } + writer.Close() + + // Read back using the reader + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + reader := infra.NewReader(file) + require.NotNil(t, reader, "NewReader should return non-nil") + + read := make([]transcript.ExchangeEvent, 0, 100) + for { + event, err := reader.Read() + if err == io.EOF { + break + } + require.NoError(t, err) + read = append(read, event) + } + + require.Equal(t, len(written), len(read), "should read back same number of events") + for i := range written { + assert.Equal(t, written[i].Seq, read[i].Seq) + assert.Equal(t, written[i].RunID, read[i].RunID) + assert.Equal(t, written[i].Type, read[i].Type) + } +} + +func TestReader_TolerantUnknownEventType(t *testing.T) { + jsonLine := `{"seq":1,"run_id":"test-run","type":"future.thing","path":"/test","iteration":0,"timestamp":"2026-01-01T00:00:00Z","payload":null}` + reader := infra.NewReader(strings.NewReader(jsonLine + "\n")) + require.NotNil(t, reader) + + event, err := reader.Read() + assert.NoError(t, err, "should not error on unknown event type") + assert.Equal(t, transcript.EventType("future.thing"), event.Type, "should preserve unknown type verbatim") + assert.Equal(t, uint64(1), event.Seq) + assert.Equal(t, "test-run", event.RunID) +} + +func TestReader_TolerantUnknownBlockType(t *testing.T) { + // MessagePayload with a block that has an unknown type + payload := `{ + "role":"user", + "blocks":[ + {"type":"text","fidelity":"router","text":"hello"}, + {"type":"future.block.kind","fidelity":"router","data":"custom"} + ] + }` + + jsonLine := fmt.Sprintf( + `{"seq":1,"run_id":"test-run","type":"message.user","path":"/test","iteration":0,"timestamp":"2026-01-01T00:00:00Z","payload":%s}`, + payload, + ) + + reader := infra.NewReader(strings.NewReader(jsonLine + "\n")) + require.NotNil(t, reader) + + event, err := reader.Read() + assert.NoError(t, err, "should not error on unknown block type inside payload") + assert.Equal(t, transcript.EventTypeMessageUser, event.Type) + + msgPayload, ok := event.Payload.(*transcript.MessagePayload) + require.True(t, ok, "payload should be MessagePayload") + require.Equal(t, 2, len(msgPayload.Blocks)) + + // First block is valid (should be properly decoded) + assert.Equal(t, transcript.BlockTypeText, msgPayload.Blocks[0].Type) + assert.Equal(t, "hello", msgPayload.Blocks[0].Text) + + // Second block has unknown type - should be preserved with type as unknown string + // The reader should tolerate it and preserve the unknown type + assert.Equal(t, transcript.BlockType("future.block.kind"), msgPayload.Blocks[1].Type) +} + +func TestReader_MalformedLineReturnsError(t *testing.T) { + truncatedJSON := `{"seq":1,"run_id":"test-run","type":"message.user","path":"/test","iteration":0,"timestamp":"2026-01-01T00:00:00Z","pay` + + reader := infra.NewReader(strings.NewReader(truncatedJSON + "\n")) + require.NotNil(t, reader) + + event, err := reader.Read() + assert.Error(t, err, "should return error for malformed JSON") + assert.ErrorIs(t, err, infra.ErrLineMalformed, "should be ErrLineMalformed") + assert.Equal(t, transcript.ExchangeEvent{}, event, "should return zero event on error") + + // Verify error message contains line number context + if errMsg, ok := err.(interface{ Error() string }); ok { + assert.Contains(t, errMsg.Error(), "1", "error should mention line number 1") + } +} + +func TestReader_EmptyLinesSkipped(t *testing.T) { + jsonLine1 := `{"seq":1,"run_id":"run1","type":"message.user","path":"/test","iteration":0,"timestamp":"2026-01-01T00:00:00Z","payload":null}` + jsonLine2 := `{"seq":2,"run_id":"run2","type":"message.user","path":"/test","iteration":0,"timestamp":"2026-01-01T00:00:00Z","payload":null}` + + input := jsonLine1 + "\n\n\n" + jsonLine2 + "\n\n" + + reader := infra.NewReader(strings.NewReader(input)) + require.NotNil(t, reader) + + // Read first event + event1, err := reader.Read() + require.NoError(t, err) + assert.Equal(t, uint64(1), event1.Seq) + assert.Equal(t, "run1", event1.RunID) + + // Read second event (empty lines should be skipped) + event2, err := reader.Read() + require.NoError(t, err) + assert.Equal(t, uint64(2), event2.Seq) + assert.Equal(t, "run2", event2.RunID) + + // EOF + _, err = reader.Read() + assert.Equal(t, io.EOF, err) +} + +func TestReader_RespectsLargeLines(t *testing.T) { + // Create a 1MB event + largeText := strings.Repeat("x", 1<<20) // 1MB of text + + event := transcript.ExchangeEvent{ + Seq: 1, + RunID: "large-run", + Type: transcript.EventTypeMessageUser, + Path: "/test", + Iteration: 0, + Timestamp: time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC), + Payload: &transcript.MessagePayload{ + Role: "user", + Blocks: []transcript.ContentBlock{ + { + Type: transcript.BlockTypeText, + Fidelity: transcript.FidelityRouter, + Text: largeText, + }, + }, + }, + } + + // Marshal to JSON + data, err := json.Marshal(event) + require.NoError(t, err) + + reader := infra.NewReader(bytes.NewReader(append(data, '\n'))) + require.NotNil(t, reader) + + readEvent, err := reader.Read() + assert.NoError(t, err, "should handle large lines without error") + assert.Equal(t, event.Seq, readEvent.Seq) + assert.Equal(t, event.RunID, readEvent.RunID) + assert.Equal(t, len(largeText), len(readEvent.Payload.(*transcript.MessagePayload).Blocks[0].Text)) +} + +func TestReader_ReadAll(t *testing.T) { + jsonLine1 := `{"seq":1,"run_id":"run1","type":"message.user","path":"/test","iteration":0,"timestamp":"2026-01-01T00:00:00Z","payload":null}` + jsonLine2 := `{"seq":2,"run_id":"run2","type":"message.assistant","path":"/test","iteration":0,"timestamp":"2026-01-01T00:00:00Z","payload":null}` + jsonLine3 := `{"seq":3,"run_id":"run3","type":"tool.call","path":"/test","iteration":0,"timestamp":"2026-01-01T00:00:00Z","payload":null}` + + input := jsonLine1 + "\n" + jsonLine2 + "\n" + jsonLine3 + "\n" + + reader := infra.NewReader(strings.NewReader(input)) + require.NotNil(t, reader) + + events, err := reader.ReadAll() + require.NoError(t, err) + require.Equal(t, 3, len(events)) + + assert.Equal(t, uint64(1), events[0].Seq) + assert.Equal(t, uint64(2), events[1].Seq) + assert.Equal(t, uint64(3), events[2].Seq) +} + +func TestReader_ReadAllEmpty(t *testing.T) { + reader := infra.NewReader(strings.NewReader("")) + require.NotNil(t, reader) + + events, err := reader.ReadAll() + assert.NoError(t, err) + assert.Equal(t, 0, len(events)) +} + +func TestReader_SequentialReads(t *testing.T) { + jsonLine1 := `{"seq":1,"run_id":"run1","type":"message.user","path":"/test","iteration":0,"timestamp":"2026-01-01T00:00:00Z","payload":null}` + jsonLine2 := `{"seq":2,"run_id":"run2","type":"message.assistant","path":"/test","iteration":0,"timestamp":"2026-01-01T00:00:00Z","payload":null}` + + input := jsonLine1 + "\n" + jsonLine2 + "\n" + + reader := infra.NewReader(strings.NewReader(input)) + require.NotNil(t, reader) + + event1, err := reader.Read() + require.NoError(t, err) + assert.Equal(t, uint64(1), event1.Seq) + + event2, err := reader.Read() + require.NoError(t, err) + assert.Equal(t, uint64(2), event2.Seq) + + _, err = reader.Read() + assert.Equal(t, io.EOF, err) +} diff --git a/internal/infrastructure/transcript/recorder.go b/internal/infrastructure/transcript/recorder.go new file mode 100644 index 00000000..a63738aa --- /dev/null +++ b/internal/infrastructure/transcript/recorder.go @@ -0,0 +1,120 @@ +package transcript + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "sync" + "sync/atomic" + + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" +) + +type RecorderOption func(*Recorder) + +type Recorder struct { + path string + writer *JSONLWriter + writerOnce sync.Once + writerErr error + fanout *FanOut + seq atomic.Uint64 + masker func(transcript.ExchangeEvent) transcript.ExchangeEvent + closeOnce sync.Once + closeErr error + fanoutOpts []FanOutOption +} + +func NewRecorder(path string, opts ...RecorderOption) (*Recorder, error) { + r := &Recorder{path: path} + for _, opt := range opts { + opt(r) + } + r.fanout = NewFanOut(r.fanoutOpts...) + r.fanoutOpts = nil + return r, nil +} + +func WithFanOutBufferSize(size int) RecorderOption { + return func(r *Recorder) { + r.fanoutOpts = append(r.fanoutOpts, WithBufferSize(size)) + } +} + +func WithRecorderLogger(logger ports.Logger) RecorderOption { + return func(r *Recorder) { + r.fanoutOpts = append(r.fanoutOpts, WithLogger(logger)) + } +} + +func WithMasker(masker func(transcript.ExchangeEvent) transcript.ExchangeEvent) RecorderOption { + return func(r *Recorder) { + r.masker = masker + } +} + +// initWriter lazily opens the transcript file on first successful Record. +// Intentionally skips MkdirAll so callers with nonexistent parent paths get a +// write-time error rather than a silent directory creation side-effect. +func (r *Recorder) initWriter() error { + r.writerOnce.Do(func() { + cleanPath := filepath.Clean(r.path) + f, err := os.OpenFile(cleanPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o600) //nolint:gosec // path is caller-controlled + if err != nil { + r.writerErr = fmt.Errorf("opening transcript file: %w", err) + return + } + r.writer = &JSONLWriter{path: cleanPath, f: f} + }) + return r.writerErr +} + +func (r *Recorder) Record(ctx context.Context, event transcript.ExchangeEvent) error { //nolint:gocritic // hugeParam: value semantics required per ports.Recorder contract + if event.Type == "" { + return ports.ErrInvalidEvent + } + + if r.masker != nil { + event = r.masker(event) + } + + if err := r.initWriter(); err != nil { + return err + } + + if event.Seq == 0 { + event.Seq = r.seq.Add(1) + } + + if err := r.writer.Write(ctx, event); err != nil { + return err + } + + r.fanout.Publish(event) + return nil +} + +func (r *Recorder) Subscribe() (events <-chan transcript.ExchangeEvent, unsubscribe func()) { + return r.fanout.Subscribe() +} + +func (r *Recorder) Close() error { + r.closeOnce.Do(func() { + // Close both the fanout and the writer regardless of intermediate errors so a + // fanout failure can never leak the writer's file descriptor; join the errors. + var errs []error + if err := r.fanout.Close(); err != nil { + errs = append(errs, err) + } + if r.writer != nil { + if err := r.writer.Close(); err != nil { + errs = append(errs, err) + } + } + r.closeErr = errors.Join(errs...) + }) + return r.closeErr +} diff --git a/internal/infrastructure/transcript/recorder_test.go b/internal/infrastructure/transcript/recorder_test.go new file mode 100644 index 00000000..59058ed7 --- /dev/null +++ b/internal/infrastructure/transcript/recorder_test.go @@ -0,0 +1,417 @@ +package transcript + +import ( + "bufio" + "context" + "encoding/json" + "os" + "path/filepath" + "strings" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/transcript" +) + +func TestRecorder_NewRecorder_Success(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.jsonl") + + rec, err := NewRecorder(path) + require.NoError(t, err) + require.NotNil(t, rec) + + err = rec.Close() + assert.NoError(t, err) +} + +func TestRecorder_InvalidEventRejected(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.jsonl") + + rec, err := NewRecorder(path) + require.NoError(t, err) + + ctx := context.Background() + zeroEvent := transcript.ExchangeEvent{} + err = rec.Record(ctx, zeroEvent) + assert.ErrorIs(t, err, ports.ErrInvalidEvent) + + err = rec.Close() + assert.NoError(t, err) + + // Verify file was not written + data, err := os.ReadFile(path) + assert.True(t, os.IsNotExist(err), "file should not exist after invalid event rejected") + assert.Empty(t, data) +} + +func TestRecorder_RecordWritesThenBroadcasts(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.jsonl") + + rec, err := NewRecorder(path) + require.NoError(t, err) + + // Subscribe before recording + ch, cancel := rec.Subscribe() + defer cancel() + + ctx := context.Background() + event := transcript.ExchangeEvent{ + RunID: "test-run-123", + Type: transcript.EventTypeRunStarted, + Timestamp: time.Now(), + } + + err = rec.Record(ctx, event) + assert.NoError(t, err) + + // Verify broadcast to subscriber + select { + case received := <-ch: + assert.NotZero(t, received.Seq) + assert.Equal(t, event.RunID, received.RunID) + assert.Equal(t, event.Type, received.Type) + case <-time.After(100 * time.Millisecond): + t.Fatal("event not received by subscriber within timeout") + } + + // Verify file was written + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + scanner := bufio.NewScanner(file) + assert.True(t, scanner.Scan(), "expected at least one line in file") + + var readEvent transcript.ExchangeEvent + err = json.Unmarshal(scanner.Bytes(), &readEvent) + require.NoError(t, err) + assert.NotZero(t, readEvent.Seq) + assert.Equal(t, event.RunID, readEvent.RunID) + + err = rec.Close() + assert.NoError(t, err) +} + +func TestRecorder_RecordWriterFailureDoesNotBroadcast(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "nonexistent", "subdir", "test.jsonl") + + rec, err := NewRecorder(path) + require.NoError(t, err) + + // Subscribe before recording + ch, cancel := rec.Subscribe() + defer cancel() + + ctx := context.Background() + event := transcript.ExchangeEvent{ + RunID: "test-run-456", + Type: transcript.EventTypeStepStarted, + Timestamp: time.Now(), + } + + // Recording should fail because we can't write to nonexistent path + err = rec.Record(ctx, event) + assert.Error(t, err) + + // Subscriber should not receive anything + select { + case <-ch: + t.Fatal("subscriber should not receive event when writer fails") + case <-time.After(50 * time.Millisecond): + // Expected: no event received + } + + rec.Close() +} + +func TestRecorder_SeqMonotonicConcurrent(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "concurrent.jsonl") + + rec, err := NewRecorder(path) + require.NoError(t, err) + + numGoroutines := 64 + callsPerGoroutine := 1000 + var wg sync.WaitGroup + + for g := range numGoroutines { + wg.Add(1) + go func(goroutineID int) { + defer wg.Done() + + ctx := context.Background() + for i := range callsPerGoroutine { + event := transcript.ExchangeEvent{ + RunID: "concurrent-test", + Type: transcript.EventTypeMessageUser, + Iteration: goroutineID*1000 + i, + Timestamp: time.Now(), + } + err := rec.Record(ctx, event) + assert.NoError(t, err, "goroutine %d iteration %d", goroutineID, i) + } + }(g) + } + + wg.Wait() + err = rec.Close() + require.NoError(t, err) + + // Read file and verify Seq is monotonic 1..64000 with no duplicates and no gaps + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + seqMap := make(map[uint64]bool) + maxSeq := uint64(0) + lineCount := 0 + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lineCount++ + var event transcript.ExchangeEvent + err := json.Unmarshal(scanner.Bytes(), &event) + require.NoError(t, err) + + assert.NotZero(t, event.Seq, "Seq must be allocated") + seqMap[event.Seq] = true + + if event.Seq > maxSeq { + maxSeq = event.Seq + } + } + require.NoError(t, scanner.Err()) + + expectedLineCount := numGoroutines * callsPerGoroutine + assert.Equal(t, expectedLineCount, lineCount) + + // Verify no duplicates and no gaps + for i := uint64(1); i <= maxSeq; i++ { + assert.True(t, seqMap[i], "Seq %d missing (duplicate or gap detected)", i) + } +} + +func TestRecorder_IdempotentClose(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.jsonl") + + rec, err := NewRecorder(path) + require.NoError(t, err) + + // First close + err = rec.Close() + assert.NoError(t, err) + + // Second close should also return nil + err = rec.Close() + assert.NoError(t, err) + + // Third close for good measure + err = rec.Close() + assert.NoError(t, err) +} + +func TestRecorder_MaskerHookApplied(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "masked.jsonl") + + // Masker that uppercases the RunID + uppercaseMasker := func(event transcript.ExchangeEvent) transcript.ExchangeEvent { + event.RunID = strings.ToUpper(event.RunID) + return event + } + + rec, err := NewRecorder(path, WithMasker(uppercaseMasker)) + require.NoError(t, err) + + // Subscribe to capture broadcast + ch, cancel := rec.Subscribe() + defer cancel() + + ctx := context.Background() + event := transcript.ExchangeEvent{ + RunID: "lowercase-test", + Type: transcript.EventTypeMessageAssistant, + Timestamp: time.Now(), + } + + err = rec.Record(ctx, event) + assert.NoError(t, err) + + // Verify subscriber receives masked event + select { + case received := <-ch: + assert.Equal(t, "LOWERCASE-TEST", received.RunID, "subscriber should see masked event") + case <-time.After(100 * time.Millisecond): + t.Fatal("event not received by subscriber within timeout") + } + + // Verify file contains masked event + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + scanner := bufio.NewScanner(file) + assert.True(t, scanner.Scan()) + + var fileEvent transcript.ExchangeEvent + err = json.Unmarshal(scanner.Bytes(), &fileEvent) + require.NoError(t, err) + assert.Equal(t, "LOWERCASE-TEST", fileEvent.RunID, "file should contain masked event") + + err = rec.Close() + assert.NoError(t, err) +} + +func TestRecorder_WithFanOutBufferSize(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.jsonl") + + rec, err := NewRecorder(path, WithFanOutBufferSize(512)) + require.NoError(t, err) + require.NotNil(t, rec) + + err = rec.Close() + assert.NoError(t, err) +} + +func TestRecorder_WithRecorderLogger(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.jsonl") + + logger := ports.NopLogger{} + rec, err := NewRecorder(path, WithRecorderLogger(logger)) + require.NoError(t, err) + require.NotNil(t, rec) + + err = rec.Close() + assert.NoError(t, err) +} + +func TestRecorder_SequenceAllocation(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.jsonl") + + rec, err := NewRecorder(path) + require.NoError(t, err) + + ctx := context.Background() + + // Record three events + event1 := transcript.ExchangeEvent{ + RunID: "seq-test", + Type: transcript.EventTypeRunStarted, + } + err = rec.Record(ctx, event1) + assert.NoError(t, err) + + event2 := transcript.ExchangeEvent{ + RunID: "seq-test", + Type: transcript.EventTypeStepStarted, + } + err = rec.Record(ctx, event2) + assert.NoError(t, err) + + event3 := transcript.ExchangeEvent{ + RunID: "seq-test", + Type: transcript.EventTypeStepCompleted, + } + err = rec.Record(ctx, event3) + assert.NoError(t, err) + + err = rec.Close() + require.NoError(t, err) + + // Read file and verify Seq allocation (1, 2, 3) + file, err := os.Open(path) + require.NoError(t, err) + defer file.Close() + + var seqs []uint64 + scanner := bufio.NewScanner(file) + for scanner.Scan() { + var event transcript.ExchangeEvent + err := json.Unmarshal(scanner.Bytes(), &event) + require.NoError(t, err) + seqs = append(seqs, event.Seq) + } + require.NoError(t, scanner.Err()) + + require.Len(t, seqs, 3) + assert.Equal(t, uint64(1), seqs[0]) + assert.Equal(t, uint64(2), seqs[1]) + assert.Equal(t, uint64(3), seqs[2]) +} + +func TestRecorder_ContextCancellation(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.jsonl") + + rec, err := NewRecorder(path) + require.NoError(t, err) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + event := transcript.ExchangeEvent{ + RunID: "cancel-test", + Type: transcript.EventTypeRunStarted, + } + + err = rec.Record(ctx, event) + assert.Error(t, err) + + err = rec.Close() + assert.NoError(t, err) +} + +func TestRecorder_Subscribe_MultipleSubscribers(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.jsonl") + + rec, err := NewRecorder(path) + require.NoError(t, err) + + // Create three subscribers + ch1, cancel1 := rec.Subscribe() + defer cancel1() + + ch2, cancel2 := rec.Subscribe() + defer cancel2() + + ch3, cancel3 := rec.Subscribe() + defer cancel3() + + ctx := context.Background() + event := transcript.ExchangeEvent{ + RunID: "multi-sub", + Type: transcript.EventTypeRunStarted, + } + + err = rec.Record(ctx, event) + assert.NoError(t, err) + + // All subscribers should receive the event + timeout := 100 * time.Millisecond + for i, ch := range []<-chan transcript.ExchangeEvent{ch1, ch2, ch3} { + select { + case received := <-ch: + assert.Equal(t, event.RunID, received.RunID, "subscriber %d", i) + case <-time.After(timeout): + t.Fatalf("subscriber %d did not receive event", i) + } + } + + err = rec.Close() + assert.NoError(t, err) +} diff --git a/internal/interfaces/cli/run.go b/internal/interfaces/cli/run.go index 4a5c35f0..0b9f35a2 100644 --- a/internal/interfaces/cli/run.go +++ b/internal/interfaces/cli/run.go @@ -27,6 +27,7 @@ import ( "github.com/awf-project/cli/internal/infrastructure/xdg" "github.com/awf-project/cli/internal/interfaces/cli/ui" "github.com/awf-project/cli/pkg/interpolation" + "github.com/google/uuid" "github.com/spf13/cobra" "golang.org/x/term" ) @@ -40,6 +41,7 @@ func newRunCommand(cfg *Config) *cobra.Command { var interactiveFlag bool var breakpointFlags []string var skipPlugins bool + var debugTranscriptMirror string cmd := &cobra.Command{ Use: "run ", @@ -86,7 +88,7 @@ Examples: if stepFlag != "" { return runSingleStep(cmd, cfg, args[0], stepFlag, inputFlags, mockFlags, skipPlugins) } - return runWorkflow(cmd, cfg, args[0], inputFlags, skipPlugins) + return runWorkflow(cmd, cfg, args[0], inputFlags, skipPlugins, debugTranscriptMirror) }, } @@ -105,6 +107,8 @@ Examples: cmd.Flags().BoolVar(&skipPlugins, "skip-plugins", false, "Skip plugin validators") cmd.Flags().StringVar(&cfg.OtelExporter, "otel-exporter", "", "OpenTelemetry OTLP exporter endpoint (e.g. localhost:4317)") cmd.Flags().StringVar(&cfg.OtelServiceName, "otel-service-name", "awf", "OpenTelemetry service name") + cmd.Flags().StringVar(&debugTranscriptMirror, "debug-transcript-mirror", "", + "[DEBUG] Write received transcript events to this path (subscription mirror)") // Wire custom help function for workflow-specific help (F035) cmd.SetHelpFunc(workflowHelpFunc(cfg)) @@ -176,7 +180,7 @@ func workflowHelpFunc(cfg *Config) func(*cobra.Command, []string) { } } -func runWorkflow(cmd *cobra.Command, cfg *Config, workflowName string, inputFlags []string, skipPlugins bool) error { +func runWorkflow(cmd *cobra.Command, cfg *Config, workflowName string, inputFlags []string, skipPlugins bool, debugTranscriptMirror string) error { // Parse inputs inputs, err := parseInputFlags(inputFlags) if err != nil { @@ -312,6 +316,19 @@ func runWorkflow(cmd *cobra.Command, cfg *Config, workflowName string, inputFlag auditWriter = aw } + // Setup transcript recorder (F106) + runID := uuid.New().String() + var recorder ports.Recorder + var mirrorCancel func() + if rec, recCleanup, recErr := WireTranscript(runID, cfg.StoragePath); recErr != nil { + logger.Warn("failed to initialize transcript recorder, transcripts disabled", "error", recErr) + } else { + defer recCleanup() //nolint:errcheck // best-effort transcript flush on exit + recorder = rec + mirrorCancel = AttachMirrorSubscriber(rec, debugTranscriptMirror) + defer mirrorCancel() + } + templatePaths := []string{ ".awf/templates", filepath.Join(cfg.StoragePath, "templates"), @@ -359,6 +376,14 @@ func runWorkflow(cmd *cobra.Command, cfg *Config, workflowName string, inputFlag setupOpts = append(setupOpts, application.WithAuditWriter(auditWriter)) } + if recorder != nil { + setupOpts = append(setupOpts, + application.WithRecorder(recorder), + application.WithRecorderFactory(NewRecorderFactory()), + application.WithTranscriptDir(filepath.Join(cfg.StoragePath, "transcripts")), + ) + } + if stdoutWriter != nil { setupOpts = append(setupOpts, application.WithOutputWriters(stdoutWriter, stderrWriter)) } @@ -377,8 +402,10 @@ func runWorkflow(cmd *cobra.Command, cfg *Config, workflowName string, inputFlag } startTime := time.Now() - // Execute workflow with pre-loaded workflow (avoids double I/O) - execCtx, execErr := execSvc.RunWithWorkflow(ctx, wf, inputs) + // Execute workflow with pre-loaded workflow (avoids double I/O). Reuse runID as the + // execution WorkflowID so the transcript file .jsonl and the run_id stamped on + // every emitted event are the same identifier (F106 SC-001). + execCtx, execErr := execSvc.RunWithWorkflowAndRunID(ctx, wf, inputs, runID) // Flush any remaining output from streaming writers if stdoutWriter != nil { diff --git a/internal/interfaces/cli/wiring_transcript.go b/internal/interfaces/cli/wiring_transcript.go new file mode 100644 index 00000000..b8f8589b --- /dev/null +++ b/internal/interfaces/cli/wiring_transcript.go @@ -0,0 +1,76 @@ +package cli + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/infrastructure/transcript" +) + +// WireTranscript builds a transcript Recorder at storage/transcripts/.jsonl +// and returns the recorder along with a cleanup closure that calls Close(). +// The storageRoot is the parent directory for transcripts (typically cfg.StoragePath). +// Returns the recorder, a cleanup function, and any error. +func WireTranscript(runID, storageRoot string) (ports.Recorder, func() error, error) { + transcriptDir := filepath.Join(storageRoot, "transcripts") + if err := os.MkdirAll(transcriptDir, 0o750); err != nil { + return nil, nil, fmt.Errorf("creating transcripts directory: %w", err) + } + + transcriptPath := filepath.Join(transcriptDir, runID+".jsonl") + + rec, err := transcript.NewRecorder(transcriptPath) + if err != nil { + return nil, nil, fmt.Errorf("creating transcript recorder: %w", err) + } + + cleanup := rec.Close + + return rec, cleanup, nil +} + +// NewRecorderFactory returns a ports.RecorderFactory backed by the transcript +// infrastructure Recorder. It is used by ExecutionService to create one child +// recorder per sub-run for F106 sub-workflow transcript linkage. The parent +// directory of the path passed to the factory must already exist. +func NewRecorderFactory() ports.RecorderFactory { + return func(path string) (ports.Recorder, error) { + return transcript.NewRecorder(path) + } +} + +// AttachMirrorSubscriber attaches a debug mirror subscriber to the recorder. +// When mirrorPath is non-empty, it subscribes to recorder events and writes them to mirrorPath. +// Returns a cancel function that should be called on shutdown. +// When mirrorPath is empty, returns a no-op cancel function. +func AttachMirrorSubscriber(rec ports.Recorder, mirrorPath string) func() { + if mirrorPath == "" || rec == nil { + return func() {} + } + + ch, cancel := rec.Subscribe() + + go func() { + f, err := os.OpenFile(mirrorPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o600) //nolint:gosec // caller-controlled debug path + if err != nil { + // Unsubscribe so the fanout stops buffering (and logging drops) for a + // subscriber that will never drain its channel, and drain any already-queued + // events to let the buffered channel be garbage-collected. + cancel() + for range ch { //nolint:revive // intentional drain of the closed channel + } + return + } + defer f.Close() //nolint:errcheck // best-effort debug mirror + + enc := json.NewEncoder(f) + for event := range ch { + _ = enc.Encode(event) //nolint:errcheck // best-effort debug mirror + } + }() + + return cancel +} diff --git a/internal/interfaces/cli/wiring_transcript_test.go b/internal/interfaces/cli/wiring_transcript_test.go new file mode 100644 index 00000000..e4828319 --- /dev/null +++ b/internal/interfaces/cli/wiring_transcript_test.go @@ -0,0 +1,166 @@ +package cli_test + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "os" + "path/filepath" + "testing" + "time" + + "github.com/awf-project/cli/internal/domain/transcript" + "github.com/awf-project/cli/internal/interfaces/cli" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestAttachMirrorSubscriber_BadPathDoesNotBlock verifies that when the mirror file +// cannot be opened, the subscriber goroutine unsubscribes and drains its channel so the +// recorder keeps writing without blocking and Close completes (no leaked subscriber). +func TestAttachMirrorSubscriber_BadPathDoesNotBlock(t *testing.T) { + tmpDir := t.TempDir() + rec, cleanup, err := cli.WireTranscript("mirror-badpath", tmpDir) + require.NoError(t, err) + defer cleanup() + + // A mirror path inside a nonexistent directory cannot be opened. + badPath := filepath.Join(tmpDir, "does-not-exist", "mirror.jsonl") + mirrorCancel := cli.AttachMirrorSubscriber(rec, badPath) + defer mirrorCancel() + + done := make(chan struct{}) + go func() { + for range 200 { + _ = rec.Record(context.Background(), transcript.ExchangeEvent{Type: transcript.EventTypeStepStarted}) + } + close(done) + }() + + select { + case <-done: + case <-time.After(3 * time.Second): + t.Fatal("recording blocked — mirror subscriber with a bad path leaked its channel") + } +} + +func TestWiringTranscript_BuildsRecorderAndCleanup(t *testing.T) { + tmpDir := t.TempDir() + runID := "test-run-123" + + rec, cleanup, err := cli.WireTranscript(runID, tmpDir) + + require.NoError(t, err) + require.NotNil(t, rec, "Recorder must not be nil") + require.NotNil(t, cleanup, "Cleanup function must not be nil") + + // Verify cleanup works without error + cleanupErr := cleanup() + assert.NoError(t, cleanupErr) +} + +func TestWiringTranscript_FilePathUsesRunID(t *testing.T) { + tmpDir := t.TempDir() + runID := "test-run-456" + + rec, cleanup, err := cli.WireTranscript(runID, tmpDir) + defer cleanup() + + require.NoError(t, err, "WireTranscript should not error") + + // Record an event to trigger file creation + testEvent := transcript.ExchangeEvent{ + Seq: 1, + RunID: runID, + Type: transcript.EventTypeRunStarted, + Path: "/test", + Iteration: 0, + Timestamp: time.Now(), + } + err = rec.Record(context.Background(), testEvent) + require.NoError(t, err, "Recording event should not error") + + // Verify the file was created at the expected path + expectedPath := filepath.Join(tmpDir, "transcripts", runID+".jsonl") + _, err = os.Stat(expectedPath) + require.NoError(t, err, "Transcript file should exist at %s", expectedPath) + require.True(t, fileExists(expectedPath), "File must exist at the expected path") +} + +func TestWiringTranscript_MirrorFlagAttachesSubscriber(t *testing.T) { + tmpDir := t.TempDir() + runID := "test-run-789" + mirrorPath := filepath.Join(tmpDir, "mirror.jsonl") + + // Create transcripts directory (required before recording can succeed) + transcriptDir := filepath.Join(tmpDir, "transcripts") + err := os.MkdirAll(transcriptDir, 0o755) + require.NoError(t, err, "Creating transcripts directory should not error") + + rec, cleanup, err := cli.WireTranscript(runID, tmpDir) + defer cleanup() + + require.NoError(t, err) + + // Attach mirror subscriber + cancel := cli.AttachMirrorSubscriber(rec, mirrorPath) + defer cancel() + + // Record a test event + testEvent := transcript.ExchangeEvent{ + Seq: 1, + RunID: runID, + Type: transcript.EventTypeRunStarted, + Path: "/test", + Iteration: 0, + Timestamp: time.Now(), + Payload: nil, + } + err = rec.Record(context.Background(), testEvent) + require.NoError(t, err, "Recording event should not error") + + // Wait for the subscriber goroutine to flush the event to the mirror file + require.Eventually(t, func() bool { + info, statErr := os.Stat(mirrorPath) + return statErr == nil && info.Size() > 0 + }, 2*time.Second, 10*time.Millisecond, "Mirror file should be created with content") + + // Read and verify the mirror file contains the event (JSONL: one JSON object per line) + data, err := os.ReadFile(mirrorPath) + require.NoError(t, err, "Reading mirror file should not error") + require.NotEmpty(t, data, "Mirror file should contain event data") + + // Parse first line from JSONL format (each line is a separate JSON object) + scanner := bufio.NewScanner(bytes.NewReader(data)) + require.True(t, scanner.Scan(), "Mirror file must contain at least one JSONL line") + var recordedEvent transcript.ExchangeEvent + err = json.Unmarshal(scanner.Bytes(), &recordedEvent) + require.NoError(t, err, "First JSONL line should be valid JSON") + + // Verify the event matches what we recorded + assert.Equal(t, testEvent.Seq, recordedEvent.Seq, "Event sequence should match") + assert.Equal(t, testEvent.RunID, recordedEvent.RunID, "Event RunID should match") + assert.Equal(t, testEvent.Type, recordedEvent.Type, "Event type should match") +} + +func TestWiringTranscript_EmptyMirrorPathIsNoop(t *testing.T) { + tmpDir := t.TempDir() + + rec, cleanup, err := cli.WireTranscript("test-run", tmpDir) + defer cleanup() + + require.NoError(t, err) + + // Empty mirror path should be a no-op + cancel := cli.AttachMirrorSubscriber(rec, "") + + // Calling cancel multiple times should be safe + cancel() + cancel() +} + +func fileExists(path string) bool { + _, err := os.Stat(path) + return err == nil +} diff --git a/tests/integration/features/transcript_test.go b/tests/integration/features/transcript_test.go new file mode 100644 index 00000000..65b7fbad --- /dev/null +++ b/tests/integration/features/transcript_test.go @@ -0,0 +1,269 @@ +//go:build integration + +// Feature: F106 +// +// Canonical Agent Exchange Transcript (JSONL) — end-to-end behavior: +// +// US1 — single append-only JSONL per run, monotonic Seq, lossless round-trip +// US4 — bounded fan-out: slow subscriber does not block disk writes +// NFR-002 — file mode 0o600 +// NFR-005 — reader is forward-compatible with unknown event types +package features_test + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/awf-project/cli/internal/domain/transcript" + infraTranscript "github.com/awf-project/cli/internal/infrastructure/transcript" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// FR-001 + FR-002 + FR-003: canonical JSONL with monotonic Seq, 0o600 perms, +// lossless round-trip across the full payload vocabulary (StepPayload, +// MessagePayload with ContentBlocks, ToolPayload). +func TestTranscript_CanonicalLifecycle_RoundTripsLosslessly(t *testing.T) { + tmpDir := t.TempDir() + transcriptPath := filepath.Join(tmpDir, "run-canonical.jsonl") + + rec, err := infraTranscript.NewRecorder(transcriptPath) + require.NoError(t, err) + + ctx := context.Background() + runID := "run-canonical" + ts := time.Date(2026, 6, 7, 12, 0, 0, 0, time.UTC) + + emitted := []transcript.ExchangeEvent{ + { + Type: transcript.EventTypeRunStarted, + RunID: runID, + Path: "f106-canonical", + Timestamp: ts, + Payload: &transcript.StepPayload{Name: "f106-canonical", Kind: "workflow"}, + }, + { + Type: transcript.EventTypeStepStarted, + RunID: runID, + Path: "greet", + Timestamp: ts, + Payload: &transcript.StepPayload{Name: "greet", Kind: "agent"}, + }, + { + Type: transcript.EventTypeMessageUser, + RunID: runID, + Path: "greet", + Timestamp: ts, + Payload: &transcript.MessagePayload{ + Role: "user", + Blocks: []transcript.ContentBlock{ + {Type: transcript.BlockTypeText, Text: "hello F106"}, + {Type: transcript.BlockTypeText, Text: "you are a helpful assistant"}, + }, + }, + }, + { + Type: transcript.EventTypeToolCall, + RunID: runID, + Path: "greet", + Timestamp: ts, + Payload: &transcript.ToolPayload{ + Name: "bash", + CallID: "call-001", + Input: map[string]any{"command": "echo hi"}, + Fidelity: transcript.FidelityRouter, + }, + }, + { + Type: transcript.EventTypeToolResult, + RunID: runID, + Path: "greet", + Timestamp: ts, + Payload: &transcript.ToolPayload{ + CallID: "call-001", + Output: "hi\n", + Fidelity: transcript.FidelityRouter, + }, + }, + { + Type: transcript.EventTypeStepCompleted, + RunID: runID, + Path: "greet", + Timestamp: ts, + Payload: &transcript.StepPayload{Name: "greet", Kind: "agent"}, + }, + { + Type: transcript.EventTypeRunCompleted, + RunID: runID, + Path: "f106-canonical", + Timestamp: ts, + Payload: &transcript.StepPayload{Name: "f106-canonical", Kind: "workflow"}, + }, + } + + for i := range emitted { + require.NoError(t, rec.Record(ctx, emitted[i])) + } + require.NoError(t, rec.Close()) + + info, err := os.Stat(transcriptPath) + require.NoError(t, err) + assert.Equal(t, os.FileMode(0o600), info.Mode().Perm(), "transcript file must be 0o600 (NFR-002)") + + f, err := os.Open(transcriptPath) + require.NoError(t, err) + defer f.Close() + + read, err := infraTranscript.NewReader(f).ReadAll() + require.NoError(t, err) + require.Len(t, read, len(emitted), "every recorded event must be readable back") + + var prevSeq uint64 + for i, ev := range read { + assert.Greater(t, ev.Seq, prevSeq, "Seq must be strictly monotonic (FR-003)") + prevSeq = ev.Seq + assert.Equal(t, emitted[i].Type, ev.Type, "event[%d] type must round-trip", i) + assert.Equal(t, emitted[i].RunID, ev.RunID, "event[%d] run_id must round-trip", i) + assert.Equal(t, emitted[i].Path, ev.Path, "event[%d] path must round-trip", i) + } + + msgEvent := read[2] + msgPayload, ok := msgEvent.Payload.(*transcript.MessagePayload) + require.True(t, ok, "message.user payload must decode as MessagePayload") + require.Len(t, msgPayload.Blocks, 2, "agent seam emission carries prompt + system_prompt as separate blocks (FR-005)") + assert.Equal(t, "hello F106", msgPayload.Blocks[0].Text) + assert.Equal(t, "you are a helpful assistant", msgPayload.Blocks[1].Text) + + toolCall, ok := read[3].Payload.(*transcript.ToolPayload) + require.True(t, ok) + assert.Equal(t, transcript.FidelityRouter, toolCall.Fidelity, + "tool.call captured at router seam must carry fidelity=router (FR-008)") +} + +// US1 AC3: prior events on disk remain intact across recorder restart (simulates kill + restart). +func TestTranscript_AppendOnly_PreservesPriorLines(t *testing.T) { + tmpDir := t.TempDir() + transcriptPath := filepath.Join(tmpDir, "run-restart.jsonl") + + first, err := infraTranscript.NewRecorder(transcriptPath) + require.NoError(t, err) + require.NoError(t, first.Record(context.Background(), transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + RunID: "run-restart", + Path: "f106-restart", + Timestamp: time.Now(), + })) + require.NoError(t, first.Close()) + + beforeRestart, err := os.ReadFile(transcriptPath) + require.NoError(t, err) + require.NotEmpty(t, beforeRestart) + + second, err := infraTranscript.NewRecorder(transcriptPath) + require.NoError(t, err) + require.NoError(t, second.Record(context.Background(), transcript.ExchangeEvent{ + Type: transcript.EventTypeRunCompleted, + RunID: "run-restart", + Path: "f106-restart", + Timestamp: time.Now(), + })) + require.NoError(t, second.Close()) + + final, err := os.ReadFile(transcriptPath) + require.NoError(t, err) + assert.True(t, strings.HasPrefix(string(final), string(beforeRestart)), + "appended writes must preserve the first run's bytes verbatim") + assert.Greater(t, len(final), len(beforeRestart), "restart must extend the file, not truncate it") +} + +// US4 / SC-005: a slow subscriber consuming far below producer rate must not block disk writes. +func TestTranscript_SlowSubscriber_DoesNotBlockDiskWrites(t *testing.T) { + tmpDir := t.TempDir() + transcriptPath := filepath.Join(tmpDir, "run-fanout.jsonl") + + rec, err := infraTranscript.NewRecorder(transcriptPath, infraTranscript.WithFanOutBufferSize(8)) + require.NoError(t, err) + t.Cleanup(func() { _ = rec.Close() }) + + ch, cancel := rec.Subscribe() + defer cancel() + + var received atomic.Int64 + subscriberDone := make(chan struct{}) + go func() { + defer close(subscriberDone) + for range ch { + received.Add(1) + time.Sleep(20 * time.Millisecond) + } + }() + + const total = 200 + writeStart := time.Now() + ctx := context.Background() + for i := 0; i < total; i++ { + require.NoError(t, rec.Record(ctx, transcript.ExchangeEvent{ + Type: transcript.EventTypeRunStarted, + RunID: "run-fanout", + Path: "f106-fanout", + Timestamp: time.Now(), + })) + } + writeElapsed := time.Since(writeStart) + + assert.Less(t, writeElapsed, 2*time.Second, + "writer must not block on slow subscriber (US4 / SC-005); took %s", writeElapsed) + + f, err := os.Open(transcriptPath) + require.NoError(t, err) + defer f.Close() + + events, err := infraTranscript.NewReader(f).ReadAll() + require.NoError(t, err) + assert.Equal(t, total, len(events), "every Record call must hit the disk regardless of subscriber speed") + + require.NoError(t, rec.Close()) + <-subscriberDone + assert.Less(t, received.Load(), int64(total), + "slow subscriber must observe drops, not the full stream (drop policy active)") +} + +// NFR-005: reader must tolerate unknown EventType values (forward-compat decode). +func TestTranscript_Reader_TolerantOfUnknownEventTypes(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "future.jsonl") + + knownLine, err := json.Marshal(transcript.ExchangeEvent{ + Seq: 1, + RunID: "run-future", + Type: transcript.EventTypeRunStarted, + Path: "f106-future", + Timestamp: time.Now(), + }) + require.NoError(t, err) + + unknownLine := []byte(`{"seq":2,"run_id":"run-future","type":"future.unknown.event","path":"f106-future","iteration":0,"timestamp":"2026-06-07T00:00:00Z","payload":null}`) + + contents := append(knownLine, '\n') + contents = append(contents, unknownLine...) + contents = append(contents, '\n') + require.NoError(t, os.WriteFile(path, contents, 0o600)) + + f, err := os.Open(path) + require.NoError(t, err) + defer f.Close() + + events, err := infraTranscript.NewReader(f).ReadAll() + require.NoError(t, err, "reader must accept unknown event types without error (NFR-005)") + require.Len(t, events, 2) + + assert.Equal(t, transcript.EventTypeRunStarted, events[0].Type) + assert.Equal(t, transcript.EventType("future.unknown.event"), events[1].Type, + "unknown EventType must be preserved verbatim for forward-compat") +}