From 42bb55d85e5b0111a0e6b1ad146b7cb311945c55 Mon Sep 17 00:00:00 2001 From: chris-colinsky Date: Sun, 7 Jun 2026 19:06:05 -0700 Subject: [PATCH] Extend LlmCompletionEvent with proposal 0057 fields Bump spec pin to v0.51.0 and extend the typed LLM completion event with request-side fields per accepted proposal 0057: input_messages, output_content, request_params, request_extras, active_prompt, active_prompt_group, call_id, and response_model. Rename request_id to response_id to align with the proposal's response-side naming. OpenAI provider now populates the new fields at emission. Image bytes in input_messages stay redacted via the existing serializer. Observers (OTel, Langfuse) and fixtures 060-068 land in follow-up PRs in this cycle; conformance.toml marks 0057 implemented since the typed event contract is satisfied. --- conformance.toml | 49 +++- openarmature-spec | 2 +- pyproject.toml | 2 +- src/openarmature/AGENTS.md | 4 +- src/openarmature/__init__.py | 2 +- src/openarmature/graph/events.py | 79 +++++- src/openarmature/llm/providers/openai.py | 60 ++++- tests/conformance/test_fixture_parsing.py | 31 +++ tests/conformance/test_typed_event_harness.py | 10 +- tests/test_smoke.py | 2 +- tests/unit/test_llm_provider.py | 241 +++++++++++++++++- 11 files changed, 457 insertions(+), 25 deletions(-) diff --git a/conformance.toml b/conformance.toml index ea208c6..c77ab78 100644 --- a/conformance.toml +++ b/conformance.toml @@ -32,7 +32,7 @@ [manifest] implementation = "openarmature-python" -spec_pin = "v0.46.0" +spec_pin = "v0.51.0" # Status values: # implemented — shipped behavior matches the proposal's contract @@ -202,6 +202,21 @@ status = "not-yet" [proposals."0020"] status = "not-yet" +# Spec (proposal 0021). Suspension capability — async-pause + +# resume primitive (``suspend()`` + ``resume()``) layering on the +# graph engine. Python has not yet shipped suspension; v0.13.0 +# leaves the capability not-yet-implemented. +[proposals."0021"] +status = "not-yet" + +# Spec v0.49.0 (proposal 0022). Harness capability — abstract +# contract for wrapping the engine in deployment runtimes +# (HTTP / event-bus / queue / CLI / streaming). Python has not +# yet shipped a harness binding; v0.13.0 leaves the capability +# not-yet-implemented. Composes with 0056 (chat sub-spec). +[proposals."0022"] +status = "not-yet" + [proposals."0042"] status = "implemented" since = "0.11.0" @@ -462,3 +477,35 @@ since = "0.12.0" [proposals."0054"] status = "implemented" since = "0.12.0" + +# Spec v0.47.0 (proposal 0055). Conformance-adapter capability — +# descriptive ratification of the existing fixture / directive +# system. No code change; python's adapter is already structured +# per the spec text by virtue of having grown alongside the +# fixtures since proposal 0001. Matches the Textual impl-tracking +# precedent (0019 / 0026 / 0030 / 0051 / 0053). +[proposals."0055"] +status = "textual-only" +since = "0.13.0" + +# Spec v0.48.0 (proposal 0056). Harness-chat capability — new +# harness sub-spec ratifying the chat-loop deployment shape +# (ChatMessage, conversation-history convention, send() callable, +# send_streaming() forward-looking surface, error-bucket → user- +# facing-reply mapping). Python does not yet ship a chat-harness +# binding; v0.13.0 leaves the capability not-yet-implemented. +[proposals."0056"] +status = "not-yet" + +# Spec v0.51.0 (proposal 0057). LlmCompletionEvent field-set +# extension — eight additive request-side fields on the typed +# event variant + ``request_id`` → ``response_id`` rename + new +# ``response_model`` field. Python lands the field-set extension +# + rename + provider population in v0.13.0 PR 3a; OTel + Langfuse +# observers continue driving their §5.5 surface off the sentinel +# NodeEvent pair through this PR (observer migration to type +# discrimination is queued for follow-up PRs 3b / 3c against the +# same v0.13.0 release). +[proposals."0057"] +status = "implemented" +since = "0.13.0" diff --git a/openarmature-spec b/openarmature-spec index 0264dc2..b2045e1 160000 --- a/openarmature-spec +++ b/openarmature-spec @@ -1 +1 @@ -Subproject commit 0264dc21d49b26aa96f33c5fd55f935b634e5f44 +Subproject commit b2045e1beed234ef6620943e13b2c5caecb66e6e diff --git a/pyproject.toml b/pyproject.toml index 669608e..def2187 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ Specification = "https://github.com/LunarCommand/openarmature-spec" openarmature = "openarmature.cli:main" [tool.openarmature] -spec_version = "0.46.0" +spec_version = "0.51.0" [dependency-groups] dev = [ diff --git a/src/openarmature/AGENTS.md b/src/openarmature/AGENTS.md index 26c66b4..50d2f2d 100644 --- a/src/openarmature/AGENTS.md +++ b/src/openarmature/AGENTS.md @@ -1,6 +1,6 @@ # OpenArmature — Agent documentation -*This is the agent guide bundled with the openarmature Python package, version 0.12.0 (spec v0.46.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.* +*This is the agent guide bundled with the openarmature Python package, version 0.12.0 (spec v0.51.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.* ## TL;DR @@ -10,7 +10,7 @@ OpenArmature is a workflow framework for LLM pipelines and tool-calling agents: ## Capability contracts -_Sourced from openarmature-spec v0.46.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._ +_Sourced from openarmature-spec v0.51.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._ ### Capability: `graph-engine` diff --git a/src/openarmature/__init__.py b/src/openarmature/__init__.py index 55f0495..c978000 100644 --- a/src/openarmature/__init__.py +++ b/src/openarmature/__init__.py @@ -25,7 +25,7 @@ """ __version__ = "0.12.0" -__spec_version__ = "0.46.0" +__spec_version__ = "0.51.0" # Proposal 0052 (spec observability §5.1 / §8.4.1): canonical # package-registry name for this implementation. Surfaces on every # OTel invocation span as ``openarmature.implementation.name`` and on diff --git a/src/openarmature/graph/events.py b/src/openarmature/graph/events.py index bb6596f..ccb9176 100644 --- a/src/openarmature/graph/events.py +++ b/src/openarmature/graph/events.py @@ -450,6 +450,22 @@ class InvocationCompletedEvent: # # Field naming matches the spec-canonical names verbatim per the spec # Q5 ack — Python snake_case happens to match the spec table 1:1. +# +# Spec proposal 0057 (v0.51.0) extension: adds 8 additive request-side +# fields (input_messages, output_content, request_params, +# request_extras, active_prompt, active_prompt_group, call_id, +# response_model) and renames request_id → response_id to match the +# response-side data the field carries. Inline image bytes in +# input_messages MUST be redacted per observability §5.5.5 before +# population — the provider reuses _serialize_messages_for_payload +# which already enforces the redaction. The three payload-bearing +# fields (input_messages, output_content, request_extras) are +# populated unconditionally on the typed event per §5.5.7; observer- +# side privacy gates (OTel disable_llm_payload, Langfuse equivalents) +# apply at rendering, symmetric with the §5.5.1 span attribute path. +# Custom queryable observers (per observability §9) own their own +# redaction posture — gating belongs at rendering with the consumer's +# awareness. @dataclass(frozen=True) class LlmCompletionEvent: """A typed LLM provider call event delivered to observers. @@ -473,17 +489,55 @@ class LlmCompletionEvent: - ``branch_name``: parallel-branches branch name when the calling node ran inside a branch; ``None`` otherwise. - ``provider``: provider identifier; matches ``gen_ai.system``. - - ``model``: the model identifier the call targeted. - - ``request_id``: provider-returned response id; ``None`` when + - ``model``: the model identifier the call targeted (the + request-side bound model; distinct from ``response_model``). + - ``response_id``: provider-returned response id; ``None`` when + the provider didn't return one. + - ``response_model``: provider-returned model identifier; + distinct from ``model`` (the provider may return a more + specific identifier than the one requested). ``None`` when the provider didn't return one. - - ``usage``: token-accounting record per ``Response.usage`` - shape. Reuses the existing ``openarmature.llm.response.Usage`` - class. ``None`` when the call returned no usage at all. + - ``usage``: token-accounting record reusing the existing + ``openarmature.llm.response.Usage`` class. ``None`` when the + call returned no usage at all. - ``latency_ms``: wall-clock latency measured at the adapter boundary, in milliseconds. ``None`` when latency was not measured. - ``finish_reason``: the call's finish reason; ``None`` when the call did not complete normally. + - ``input_messages``: the message list the call was made with, + serialized to the plain-dict shape. Non-nullable; empty list + when the call had no history. Inline image bytes are + redacted before population (see the comment block above for + the redaction contract). + - ``output_content``: the assistant message's content string + from the response. ``None`` on tool-call-only responses + (the structured-response and tool-call paths are mutually + exclusive at the response level). + - ``request_params``: the GenAI request-parameter set the + caller supplied. Absence-is-meaningful: only caller-supplied + keys appear; empty mapping when none supplied. Keys are the + cross-vendor parameter names without the ``gen_ai.request.`` + prefix (e.g. ``temperature``, ``max_tokens``). + - ``request_extras``: the ``RuntimeConfig`` extras pass- + through bag in native mapping form (not JSON-encoded). + Empty mapping when no extras supplied. + - ``active_prompt``: 5-field identity snapshot of the active + ``PromptResult`` at LLM-call time (``name`` / ``version`` / + ``label`` / ``template_hash`` / ``rendered_hash``). + ``None`` when the call ran outside any prompt-context + binding. Typed as ``Any`` because the prompts package + imports State indirectly; observer-side narrowing reads + the attribute names directly. + - ``active_prompt_group``: ``{group_name}`` snapshot when the + call ran inside a ``PromptGroup`` context; ``None`` + otherwise. Same ``Any`` typing rationale as + ``active_prompt``. + - ``call_id``: per-call disambiguator minted by the + implementation. Always present, freshly minted per + ``provider.complete()`` call, stable for the call's + lifetime, unique within the run. Distinct from + ``response_id``. - ``caller_invocation_metadata``: optional snapshot of caller- supplied invocation metadata at LLM-call time. Populated only when the provider's opt-in flag is set (per-language @@ -499,13 +553,26 @@ class LlmCompletionEvent: branch_name: str | None provider: str model: str - request_id: str | None + response_id: str | None + response_model: str | None # Usage is a string-typed forward reference per the TYPE_CHECKING # import above — keeps the runtime import direction graph → llm # off the module-load path while preserving pyright resolution. usage: "Usage | None" latency_ms: float | None finish_reason: str | None + # Proposal 0057 (spec v0.51.0) additive request-side fields. + # Non-nullable for input_messages / request_params / + # request_extras — absence is represented as empty list / empty + # mapping, not None. output_content stays nullable for tool- + # call-only assistant messages. + input_messages: list[dict[str, Any]] + output_content: str | None + request_params: Mapping[str, Any] + request_extras: Mapping[str, Any] + active_prompt: Any + active_prompt_group: Any + call_id: str caller_invocation_metadata: Mapping[str, AttributeValue] | None = None diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py index 0eedbcb..7959d6c 100644 --- a/src/openarmature/llm/providers/openai.py +++ b/src/openarmature/llm/providers/openai.py @@ -525,20 +525,46 @@ async def complete( # observers filtering on the sentinel namespace see the # NodeEvent pair above. Failure path doesn't reach here. dispatch( - self._build_llm_completion_event(response, latency_ms), + self._build_llm_completion_event( + response, + latency_ms, + call_id=call_id, + input_messages=serialized_messages, + request_params=request_params, + request_extras=request_extras, + active_prompt=active_prompt, + active_prompt_group=active_prompt_group, + ), ) return response - def _build_llm_completion_event(self, response: Response, latency_ms: float) -> LlmCompletionEvent: + def _build_llm_completion_event( + self, + response: Response, + latency_ms: float, + *, + call_id: str, + input_messages: list[dict[str, Any]], + request_params: dict[str, Any], + request_extras: dict[str, Any], + active_prompt: Any, + active_prompt_group: Any, + ) -> LlmCompletionEvent: """Construct the typed LlmCompletionEvent for the success path. Sources identity / scoping fields from the calling-node - ContextVars and outcome fields from the response. The calling- - node namespace is the FULL namespace tuple (not the legacy - sentinel pseudo-namespace); node_name is the last element of - the namespace (the user-defined node that issued the call). - Outside any node body (namespace empty), node_name is the - empty string. + ContextVars and outcome fields from the response. Request-side + fields (per proposal 0057) are passed through from the + provider's complete() local state — serialized message list, + the gen_ai.request.* parameter mapping, the RuntimeConfig + extras, the prompt-context snapshots taken at dispatch time, + and the call-id minted at the call's start. + + The calling-node namespace is the FULL namespace tuple (not + the legacy sentinel pseudo-namespace); node_name is the last + element of the namespace (the user-defined node that issued + the call). Outside any node body (namespace empty), node_name + is the empty string. """ namespace = current_namespace_prefix() @@ -560,6 +586,14 @@ def _build_llm_completion_event(self, response: Response, latency_ms: float) -> # frozen view; if a node body mutates metadata after the # snapshot, the event still carries the at-emission view. caller_metadata = dict(current_invocation_metadata()) + # ``output_content`` is None on tool-call-only assistant + # messages per llm-provider §6 mutual-exclusion: the + # tool-call path and structured-content path are mutually + # exclusive at the response level, and provider.complete() + # leaves the AssistantMessage.content as the empty string on + # the tool-call path (which we project to None per the + # typed-event contract). + output_content = response.message.content or None return LlmCompletionEvent( invocation_id=invocation_id, correlation_id=current_correlation_id(), @@ -570,10 +604,18 @@ def _build_llm_completion_event(self, response: Response, latency_ms: float) -> branch_name=current_branch_name(), provider=self._genai_system, model=self.model, - request_id=response.response_id, + response_id=response.response_id, + response_model=response.response_model, usage=response.usage, latency_ms=latency_ms, finish_reason=response.finish_reason, + input_messages=input_messages, + output_content=output_content, + request_params=request_params, + request_extras=request_extras, + active_prompt=active_prompt, + active_prompt_group=active_prompt_group, + call_id=call_id, caller_invocation_metadata=caller_metadata, ) diff --git a/tests/conformance/test_fixture_parsing.py b/tests/conformance/test_fixture_parsing.py index c2537b2..655bd68 100644 --- a/tests/conformance/test_fixture_parsing.py +++ b/tests/conformance/test_fixture_parsing.py @@ -334,6 +334,37 @@ def _id(case: tuple[str, Path]) -> str: "observability/056-llm-completion-event-strict-serial-ordering": ( "Proposal 0049 typed LLM completion event; queued for v0.13.0" ), + # Proposal 0057 (LlmCompletionEvent field-set extension, v0.51.0) + # — fixtures 060-068 share the same ``typed_observers`` directive + # shape as 050-056 and inherit the same parser-deferral status + # pending the harness model's typed-event-collector schema work. + "observability/060-llm-completion-event-input-messages-populated": ( + "Proposal 0057 typed event request-side fields; queued for v0.13.0" + ), + "observability/061-llm-completion-event-output-content-populated": ( + "Proposal 0057 typed event request-side fields; queued for v0.13.0" + ), + "observability/062-llm-completion-event-request-params-populated": ( + "Proposal 0057 typed event request-side fields; queued for v0.13.0" + ), + "observability/063-llm-completion-event-request-extras-populated": ( + "Proposal 0057 typed event request-side fields; queued for v0.13.0" + ), + "observability/064-llm-completion-event-active-prompt-populated": ( + "Proposal 0057 typed event request-side fields; queued for v0.13.0" + ), + "observability/065-llm-completion-event-active-prompt-null": ( + "Proposal 0057 typed event request-side fields; queued for v0.13.0" + ), + "observability/066-llm-completion-event-active-prompt-group-populated": ( + "Proposal 0057 typed event request-side fields; queued for v0.13.0" + ), + "observability/067-llm-completion-event-call-id-always-present-and-distinct": ( + "Proposal 0057 typed event request-side fields; queued for v0.13.0" + ), + "observability/068-llm-completion-event-response-model-distinct-from-request": ( + "Proposal 0057 typed event request-side fields; queued for v0.13.0" + ), # Proposal 0050 (failure-isolation middleware + call-level retry, # v0.42.0) — llm-provider fixtures 056-058 (call-level retry) and # pipeline-utilities fixtures 058-063 (failure-isolation diff --git a/tests/conformance/test_typed_event_harness.py b/tests/conformance/test_typed_event_harness.py index 58c7be1..151d3c0 100644 --- a/tests/conformance/test_typed_event_harness.py +++ b/tests/conformance/test_typed_event_harness.py @@ -51,10 +51,18 @@ def _make_typed_event(**overrides: Any) -> LlmCompletionEvent: "branch_name": None, "provider": "openai", "model": "gpt-test", - "request_id": "req-1", + "response_id": "req-1", + "response_model": None, "usage": Usage(prompt_tokens=14, completion_tokens=4, total_tokens=18), "latency_ms": 42.0, "finish_reason": "stop", + "input_messages": [], + "output_content": None, + "request_params": {}, + "request_extras": {}, + "active_prompt": None, + "active_prompt_group": None, + "call_id": "cc-1", "caller_invocation_metadata": None, } base.update(overrides) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 211dad3..88e20f7 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -9,7 +9,7 @@ def test_package_versions() -> None: assert openarmature.__version__ == "0.12.0" - assert openarmature.__spec_version__ == "0.46.0" + assert openarmature.__spec_version__ == "0.51.0" def test_spec_version_matches_pyproject() -> None: diff --git a/tests/unit/test_llm_provider.py b/tests/unit/test_llm_provider.py index aba457c..bd6f2ed 100644 --- a/tests/unit/test_llm_provider.py +++ b/tests/unit/test_llm_provider.py @@ -1375,7 +1375,7 @@ async def test_llm_completion_event_carries_typed_outcome_fields() -> None: assert typed.provider == "vllm" assert typed.model == "m-test" assert typed.finish_reason == "stop" - assert typed.request_id == "x" # the helper returns id="x" + assert typed.response_id == "x" # the helper returns id="x" # usage flows through the shared Usage shape; cache field surfaces # via the typed event without separate plumbing per the # proposal-0047 + proposal-0049 architectural pair. @@ -1385,6 +1385,243 @@ async def test_llm_completion_event_carries_typed_outcome_fields() -> None: assert typed.latency_ms >= 0.0 +async def test_llm_completion_event_carries_input_messages_and_output_content() -> None: + # Proposal 0057 request-side fields: input_messages carries the + # serialized message list; output_content carries the assistant + # message's text. Both populated unconditionally on the typed + # event (privacy gating sits at observer rendering). + events, token = _collecting_dispatch() + transport = _make_openai_response_with_usage( + {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15} + ) + provider = OpenAIProvider(base_url="http://test", model="m", api_key="k", transport=transport) + try: + await provider.complete( + [SystemMessage(content="Be helpful."), UserMessage(content="hi")], + ) + finally: + await provider.aclose() + _release_dispatch(token) + + typed = next(e for e in events if isinstance(e, LlmCompletionEvent)) + assert typed.input_messages == [ + {"role": "system", "content": "Be helpful."}, + {"role": "user", "content": "hi"}, + ] + # The mock response returns content="ok" — see _make_openai_response_with_usage. + assert typed.output_content == "ok" + + +async def test_llm_completion_event_output_content_none_on_tool_call_response() -> None: + # Per llm-provider §6 mutual-exclusion: tool-call responses leave + # AssistantMessage.content as the empty string. The typed event + # projects that to None. + def _handler(_req: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={ + "id": "x", + "object": "chat.completion", + "created": 0, + "model": "m", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": {"name": "echo", "arguments": '{"x": 1}'}, + } + ], + }, + "finish_reason": "tool_calls", + } + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}, + }, + ) + + events, token = _collecting_dispatch() + provider = OpenAIProvider( + base_url="http://test", + model="m", + api_key="k", + transport=httpx.MockTransport(_handler), + ) + try: + await provider.complete( + [UserMessage(content="call echo")], + tools=[Tool(name="echo", description="", parameters={})], + ) + finally: + await provider.aclose() + _release_dispatch(token) + + typed = next(e for e in events if isinstance(e, LlmCompletionEvent)) + assert typed.output_content is None + assert typed.finish_reason == "tool_calls" + + +async def test_llm_completion_event_request_params_only_carries_supplied_keys() -> None: + # Proposal 0057 request_params shape: absence-is-meaningful. Only + # caller-supplied gen_ai.request.* keys appear; unset RuntimeConfig + # fields are omitted from the mapping (NOT included with None + # values). + from openarmature.llm import RuntimeConfig + + events, token = _collecting_dispatch() + transport = _make_openai_response_with_usage( + {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2} + ) + provider = OpenAIProvider(base_url="http://test", model="m", api_key="k", transport=transport) + try: + await provider.complete( + [UserMessage(content="hi")], + config=RuntimeConfig(temperature=0.7, max_tokens=64), + ) + finally: + await provider.aclose() + _release_dispatch(token) + + typed = next(e for e in events if isinstance(e, LlmCompletionEvent)) + # Only the two caller-supplied keys; not top_p / seed / etc. + assert dict(typed.request_params) == {"temperature": 0.7, "max_tokens": 64} + + +async def test_llm_completion_event_request_extras_flows_through() -> None: + # Proposal 0057 request_extras: RuntimeConfig extras pass-through + # in native mapping form (not JSON-encoded). + from openarmature.llm import RuntimeConfig + + events, token = _collecting_dispatch() + transport = _make_openai_response_with_usage( + {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2} + ) + provider = OpenAIProvider(base_url="http://test", model="m", api_key="k", transport=transport) + try: + # ``guided_decoding`` is a vLLM-specific extra; RuntimeConfig + # accepts undeclared fields via extra="allow". Use model_validate + # so pyright doesn't flag the undeclared kwarg. + await provider.complete( + [UserMessage(content="hi")], + config=RuntimeConfig.model_validate({"guided_decoding": {"choice": ["a", "b"]}}), + ) + finally: + await provider.aclose() + _release_dispatch(token) + + typed = next(e for e in events if isinstance(e, LlmCompletionEvent)) + assert dict(typed.request_extras) == {"guided_decoding": {"choice": ["a", "b"]}} + + +async def test_llm_completion_event_response_model_distinct_from_request_model() -> None: + # Proposal 0057 response_model: provider-returned identifier, + # distinct from the request-bound model. The OpenAI Chat Completions + # spec lets the provider return a more specific identifier + # (e.g. requested gpt-4o → response model gpt-4o-2024-08-06). + def _handler(_req: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={ + "id": "cc-1", + "object": "chat.completion", + "created": 0, + "model": "gpt-4o-2024-08-06", # distinct from bound model + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "ok"}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}, + }, + ) + + events, token = _collecting_dispatch() + provider = OpenAIProvider( + base_url="http://test", model="gpt-4o", api_key="k", transport=httpx.MockTransport(_handler) + ) + try: + await provider.complete([UserMessage(content="hi")]) + finally: + await provider.aclose() + _release_dispatch(token) + + typed = next(e for e in events if isinstance(e, LlmCompletionEvent)) + assert typed.model == "gpt-4o" # request-side bound model + assert typed.response_model == "gpt-4o-2024-08-06" # provider-returned + + +async def test_llm_completion_event_call_id_always_present_and_distinct_across_calls() -> None: + # Proposal 0057 call_id contract: always present, freshly minted + # per provider.complete() call. Two calls produce two distinct + # call_ids. + events, token = _collecting_dispatch() + transport = _make_openai_response_with_usage( + {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2} + ) + provider = OpenAIProvider(base_url="http://test", model="m", api_key="k", transport=transport) + try: + await provider.complete([UserMessage(content="hi")]) + await provider.complete([UserMessage(content="hi again")]) + finally: + await provider.aclose() + _release_dispatch(token) + + typed_events = [e for e in events if isinstance(e, LlmCompletionEvent)] + assert len(typed_events) == 2 + assert typed_events[0].call_id + assert typed_events[1].call_id + assert typed_events[0].call_id != typed_events[1].call_id + + +async def test_llm_completion_event_input_messages_redacts_inline_image_bytes() -> None: + # Privacy contract: inline image bytes are redacted from + # input_messages before population. The serializer replaces the + # ImageSourceInline source with {"type": "inline_redacted", + # "byte_count": N}; the raw base64_data must never appear on the + # typed event. Catches regressions in _serialize_messages_for_payload + # that would leak bytes through the typed-event surface. + from openarmature.llm import ImageBlock, ImageSourceInline, TextBlock + + inline_bytes = "ZmFrZS1iYXNlNjQtZGF0YQ==" # arbitrary base64 + events, token = _collecting_dispatch() + transport = _make_openai_response_with_usage( + {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2} + ) + provider = OpenAIProvider(base_url="http://test", model="m", api_key="k", transport=transport) + try: + await provider.complete( + [ + UserMessage( + content=[ + TextBlock(text="Describe this."), + ImageBlock( + source=ImageSourceInline(base64_data=inline_bytes), + media_type="image/png", + ), + ] + ) + ] + ) + finally: + await provider.aclose() + _release_dispatch(token) + + typed = next(e for e in events if isinstance(e, LlmCompletionEvent)) + # Raw base64 bytes MUST NOT appear anywhere in input_messages. + serialized = repr(typed.input_messages) + assert inline_bytes not in serialized, "inline image bytes leaked into LlmCompletionEvent.input_messages" + # Sanity: the redaction marker IS present. + assert "inline_redacted" in serialized + assert "byte_count" in serialized + + async def test_caller_invocation_metadata_off_by_default() -> None: # Per proposal 0049's OPT-IN contract: default absent / None. events, token = _collecting_dispatch() @@ -1462,7 +1699,7 @@ def _handler(_req: httpx.Request) -> httpx.Response: _release_dispatch(token) typed = next(e for e in events if isinstance(e, LlmCompletionEvent)) - assert typed.request_id is None + assert typed.response_id is None async def test_llm_completion_event_arrives_after_sentinel_completed_within_provider_emission() -> None: