CLAUDE: agent_map list output (return_table) (#5531)

nikosbosse · github-actions[bot] · commit 94a3e70d3f5b · 2026-04-24T10:40:22.000Z
## Summary CLAUDE: Adds a `return_list` (api_v0) / `return_table` (SDK) option to `agent_map`, mirroring the existing flag on `single_agent`. When enabled, each per-row agent emits a list of records that fans out into multiple output rows (with an `_expand_index` column). This restores the "map + expand" capability that earlier Cohort code exposed. The internal `AgentQueryParams.is_expand` path and ClickHouse partial-table ingestion already supported MAP+expand for the autocohort path — only the public API surface gated it off. ### Changes **Engine (api_v0):** - `data_types/operations.py` — added `return_list: bool = False` field to `AgentMapOperation` (mirrors `SingleAgentOperation`). - `conversions.py` — both request types now flow `return_list → AgentQueryParams.is_expand`. Removed the explicit `isinstance(SingleAgentOperation)` gate. **SDK:** - `ops.py` — threaded `return_table: bool = False` through `agent_map`, `agent_map_async`, and `_submit_agent_map`. Forwards as `return_list` on the wire and flips `EveryrowTask.is_expand` so the result is unpacked correctly. - `generated/models/agent_map_operation.py` — regenerated from the updated OpenAPI spec. - `README.md` — added a `return_table=True` example. **Tests:** - 4 new handler tests on `/operations/agent-map` (default false, explicit false, explicit true, column-collision contract under `join_with_input=True`). - 5 new unit tests on `agent_operation_to_agent_query_params` covering both request types. - Replaced misleading `test_agent_map_with_table_output` (didn't actually exercise the flag) with two tests asserting `return_list` is forwarded and the result fans out (3 items × 2 input rows → 6 output rows). ### What's not in this PR - **SDK version bump.** Per the `bump-sdk-version` skill, version bumps go in their own dedicated PR. Should be a follow-up after this lands. ### Schema contract (worth calling out) When `return_table=True`, users pass the **per-item** `response_schema` — not a pre-wrapped `{items: [...]}` schema. The worker calls `wrap_json_schema_in_list` at execution time; ClickHouse unwraps via `_extract_item_schema`. Documented in the field description and SDK docstring. ## Test plan - [x] `uv run pytest tests/server/api_v0/test_operations.py tests/server/api_v0/test_conversions.py` — 136 passed - [x] `uv run pytest tests/test_ops.py` (futuresearch-python) — 13 passed - [x] `uv run ruff check` and `uv run pyright` clean on touched files - [ ] Smoke-test against staging once merged: submit an `agent_map` with `return_list=true` and a small per-item schema, confirm row count > input row count and `_expand_index` column is present. - [ ] Autocohort regression spot-check — same `query_params` shape lands in DB regardless of submission path, but worth running the autocohort tests. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com> Sourced from commit 09d429c61cfb923102ed411a88d641af6699b1dc
diff --git a/README.md b/README.md
@@ -84,6 +84,18 @@ result = await agent_map(
     ]),
 )
 print(result.data.head())
+
+# Same map, but each agent emits a list of records that fan out into extra rows
+# (one row per item, with an `_expand_index` column).
+result = await agent_map(
+    task="List this company's top 5 products",
+    input=DataFrame([
+        {"company": "Anthropic"},
+        {"company": "OpenAI"},
+    ]),
+    return_table=True,
+)
+print(result.data.head())
 ```
 
 See the API [docs](https://futuresearch.ai/docs/reference/RESEARCH), a case study of [labeling data](https://futuresearch.ai/docs/classify-dataframe-rows-llm) or a case study for [researching government data](https://futuresearch.ai/docs/case-studies/research-and-rank-permit-times) at scale.
diff --git a/src/futuresearch/generated/models/agent_map_operation.py b/src/futuresearch/generated/models/agent_map_operation.py
@@ -51,6 +51,9 @@ class AgentMapOperation:
             more important than overall throughput. Default: False.
         document_query_llm (LLMEnumPublic | None | Unset): LLM to use for the document query tool (QDLLM) that reads and
             extracts information from web pages. If not provided, defaults to the system default.
+        return_list (bool | Unset): If True, treat each row's agent output as a list of records and emit one output row
+            per item (with an `_expand_index` column). The `response_schema` should describe a single item; the worker wraps
+            it in a list automatically. Do not pre-wrap your schema. Default: False.
     """
 
     input_: AgentMapOperationInputType2 | list[AgentMapOperationInputType1Item] | UUID
@@ -66,6 +69,7 @@ class AgentMapOperation:
     include_research: bool | None | Unset = UNSET
     enforce_row_independence: bool | Unset = False
     document_query_llm: LLMEnumPublic | None | Unset = UNSET
+    return_list: bool | Unset = False
     additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
 
     def to_dict(self) -> dict[str, Any]:
@@ -153,6 +157,8 @@ def to_dict(self) -> dict[str, Any]:
         else:
             document_query_llm = self.document_query_llm
 
+        return_list = self.return_list
+
         field_dict: dict[str, Any] = {}
         field_dict.update(self.additional_properties)
         field_dict.update(
@@ -183,6 +189,8 @@ def to_dict(self) -> dict[str, Any]:
             field_dict["enforce_row_independence"] = enforce_row_independence
         if document_query_llm is not UNSET:
             field_dict["document_query_llm"] = document_query_llm
+        if return_list is not UNSET:
+            field_dict["return_list"] = return_list
 
         return field_dict
 
@@ -351,6 +359,8 @@ def _parse_document_query_llm(data: object) -> LLMEnumPublic | None | Unset:
 
         document_query_llm = _parse_document_query_llm(d.pop("document_query_llm", UNSET))
 
+        return_list = d.pop("return_list", UNSET)
+
         agent_map_operation = cls(
             input_=input_,
             task=task,
@@ -365,6 +375,7 @@ def _parse_document_query_llm(data: object) -> LLMEnumPublic | None | Unset:
             include_research=include_research,
             enforce_row_independence=enforce_row_independence,
             document_query_llm=document_query_llm,
+            return_list=return_list,
         )
 
         agent_map_operation.additional_properties = d
diff --git a/src/futuresearch/ops.py b/src/futuresearch/ops.py
@@ -324,6 +324,7 @@ async def agent_map(
     enforce_row_independence: bool = False,
     response_model: type[BaseModel] = DefaultAgentResponse,
     document_query_llm: LLM | None = None,
+    return_table: bool = False,
 ) -> TableResult:
     """Execute an AI agent task on each row of the input table.
 
@@ -336,8 +337,12 @@ async def agent_map(
         llm: LLM to use for each agent. Required when effort_level is None.
         iteration_budget: Number of agent iterations per row (0-20). Required when effort_level is None.
         include_reasoning: Include reasoning notes. Required when effort_level is None.
-        response_model: Pydantic model for the response schema.
+        response_model: Pydantic model for the response schema. When ``return_table`` is True,
+            this should describe a single item; the worker wraps it in a list automatically.
         document_query_llm: LLM to use for the document query tool (QDLLM) when scraping web pages.
+        return_table: If True, each per-row agent emits a list of records and the result table
+            contains one row per item (with an ``_expand_index`` column). Output rows can exceed
+            input rows. Default: False (one output row per input row).
 
     Returns:
         TableResult containing the agent results merged with input rows.
@@ -357,6 +362,7 @@ async def agent_map(
                 enforce_row_independence=enforce_row_independence,
                 response_model=response_model,
                 document_query_llm=document_query_llm,
+                return_table=return_table,
             )
             result = await cohort_task.await_result()
             if isinstance(result, TableResult):
@@ -373,6 +379,7 @@ async def agent_map(
         enforce_row_independence=enforce_row_independence,
         response_model=response_model,
         document_query_llm=document_query_llm,
+        return_table=return_table,
     )
     result = await cohort_task.await_result()
     if isinstance(result, TableResult):
@@ -391,6 +398,7 @@ async def _submit_agent_map(
     enforce_row_independence: bool = False,
     response_schema: dict | None = None,
     document_query_llm: LLM | None = None,
+    return_table: bool = False,
 ) -> SubmittedTask:
     """Build and submit an agent_map request."""
     input_data = _prepare_table_input(input, AgentMapOperationInputType1Item)
@@ -413,6 +421,7 @@ async def _submit_agent_map(
         document_query_llm=LLMEnumPublic(document_query_llm.value)
         if document_query_llm is not None
         else UNSET,
+        return_list=return_table,
     )
 
     response = await agent_map_operations_agent_map_post.asyncio(
@@ -433,6 +442,7 @@ async def agent_map_async(
     enforce_row_independence: bool = False,
     response_model: type[BaseModel] = DefaultAgentResponse,
     document_query_llm: LLM | None = None,
+    return_table: bool = False,
 ) -> EveryrowTask[BaseModel]:
     """Submit an agent_map task asynchronously."""
     submitted = await _submit_agent_map(
@@ -446,10 +456,11 @@ async def agent_map_async(
         enforce_row_independence=enforce_row_independence,
         response_schema=response_model.model_json_schema(),
         document_query_llm=document_query_llm,
+        return_table=return_table,
     )
 
     cohort_task = EveryrowTask(
-        response_model=response_model, is_map=True, is_expand=False
+        response_model=response_model, is_map=True, is_expand=return_table
     )
     cohort_task.set_submitted(submitted.task_id, submitted.session_id, session.client)
     return cohort_task
diff --git a/tests/test_ops.py b/tests/test_ops.py
@@ -263,11 +263,11 @@ async def test_agent_map(mocker, mock_session):
 
 
 @pytest.mark.asyncio
-async def test_agent_map_with_table_output(mocker, mock_session):
+async def test_agent_map_with_return_table_forwards_return_list(mocker, mock_session):
+    """When return_table=True, agent_map sends return_list=True and accepts a fan-out result."""
     task_id = uuid.uuid4()
     artifact_id = uuid.uuid4()
 
-    # Mock operation endpoint
     mock_submit = mocker.patch(
         "futuresearch.ops.agent_map_operations_agent_map_post.asyncio",
         new_callable=AsyncMock,
@@ -278,7 +278,6 @@ async def test_agent_map_with_table_output(mocker, mock_session):
         status=TaskStatus.PENDING,
     )
 
-    # Mock get_task_status
     mock_status = mocker.patch(
         "futuresearch.task.get_task_status_tasks_task_id_status_get.asyncio",
         new_callable=AsyncMock,
@@ -287,16 +286,20 @@ async def test_agent_map_with_table_output(mocker, mock_session):
         task_id, mock_session.session_id, artifact_id
     )
 
-    # Mock get_task_result
+    # Two input rows; agent fans each out into 3 cities, so 6 output rows total.
     mock_result = mocker.patch(
         "futuresearch.task.get_task_result_tasks_task_id_result_get.asyncio",
         new_callable=AsyncMock,
     )
     mock_result.return_value = _make_table_result(
         task_id,
         [
-            {"country": "India", "city": "Mumbai"},
-            {"country": "USA", "city": "New York"},
+            {"country": "India", "city": "Mumbai", "_expand_index": 0},
+            {"country": "India", "city": "Delhi", "_expand_index": 1},
+            {"country": "India", "city": "Bangalore", "_expand_index": 2},
+            {"country": "USA", "city": "New York", "_expand_index": 0},
+            {"country": "USA", "city": "Los Angeles", "_expand_index": 1},
+            {"country": "USA", "city": "Chicago", "_expand_index": 2},
         ],
         artifact_id,
     )
@@ -306,13 +309,59 @@ async def test_agent_map_with_table_output(mocker, mock_session):
         task="What are the three largest cities in the given country?",
         session=mock_session,
         input=input_df,
+        return_table=True,
     )
 
+    # Body sent to the API carries return_list=True.
+    submit_kwargs = mock_submit.await_args.kwargs
+    assert submit_kwargs["body"].return_list is True
+
     assert isinstance(result, TableResult)
-    assert len(result.data) == 2
+    assert len(result.data) == 6
+    assert "city" in result.data.columns
     assert result.artifact_id == artifact_id
 
 
+@pytest.mark.asyncio
+async def test_agent_map_default_does_not_set_return_list(mocker, mock_session):
+    task_id = uuid.uuid4()
+    artifact_id = uuid.uuid4()
+
+    mock_submit = mocker.patch(
+        "futuresearch.ops.agent_map_operations_agent_map_post.asyncio",
+        new_callable=AsyncMock,
+    )
+    mock_submit.return_value = OperationResponse(
+        task_id=task_id,
+        session_id=mock_session.session_id,
+        status=TaskStatus.PENDING,
+    )
+
+    mocker.patch(
+        "futuresearch.task.get_task_status_tasks_task_id_status_get.asyncio",
+        new_callable=AsyncMock,
+        return_value=_make_status_response(
+            task_id, mock_session.session_id, artifact_id
+        ),
+    )
+    mocker.patch(
+        "futuresearch.task.get_task_result_tasks_task_id_result_get.asyncio",
+        new_callable=AsyncMock,
+        return_value=_make_table_result(
+            task_id, [{"country": "India", "answer": "New Delhi"}], artifact_id
+        ),
+    )
+
+    await agent_map(
+        task="capital?",
+        session=mock_session,
+        input=pd.DataFrame([{"country": "India"}]),
+    )
+
+    submit_kwargs = mock_submit.await_args.kwargs
+    assert submit_kwargs["body"].return_list is False
+
+
 @pytest.mark.asyncio
 async def test_rank_model_validation(mock_session) -> None:
     input_df = pd.DataFrame(