Skip to content

Commit 83c7e0a

Browse files
CallumMcMahongithub-actions[bot]
authored andcommitted
feat(merge): add llm and document_query_llm params to merge API and SDK (#5435)
Tested locally, I believe it worked end to end Accounts routed through dedicated provider keys (e.g. clio's Anthropic-only routing) could not use merge because it hardcoded GEMINI_3_FLASH_MINIMAL as the merge model. This adds llm and document_query_llm parameters to the merge operation, matching what agent_map already supports. The llm param controls both the LLM merge phase (merge_model) and the web search agent model. document_query_llm controls the QDLLM for reading web pages during the web search phase. --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Sourced from commit f62665b18a25024b7d006d5abe4f2e88fbc8b885
1 parent 3f23013 commit 83c7e0a

2 files changed

Lines changed: 137 additions & 18 deletions

File tree

src/futuresearch/generated/models/merge_operation.py

Lines changed: 123 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,26 @@
77
from attrs import define as _attrs_define
88
from attrs import field as _attrs_field
99

10-
from ..models.merge_operation_relationship_type_type_0 import MergeOperationRelationshipTypeType0
11-
from ..models.merge_operation_use_web_search_type_0 import MergeOperationUseWebSearchType0
10+
from ..models.llm_enum_public import LLMEnumPublic
11+
from ..models.merge_operation_relationship_type_type_0 import (
12+
MergeOperationRelationshipTypeType0,
13+
)
14+
from ..models.merge_operation_use_web_search_type_0 import (
15+
MergeOperationUseWebSearchType0,
16+
)
1217
from ..types import UNSET, Unset
1318

1419
if TYPE_CHECKING:
15-
from ..models.merge_operation_left_input_type_1_item import MergeOperationLeftInputType1Item
20+
from ..models.merge_operation_left_input_type_1_item import (
21+
MergeOperationLeftInputType1Item,
22+
)
1623
from ..models.merge_operation_left_input_type_2 import MergeOperationLeftInputType2
17-
from ..models.merge_operation_right_input_type_1_item import MergeOperationRightInputType1Item
18-
from ..models.merge_operation_right_input_type_2 import MergeOperationRightInputType2
24+
from ..models.merge_operation_right_input_type_1_item import (
25+
MergeOperationRightInputType1Item,
26+
)
27+
from ..models.merge_operation_right_input_type_2 import (
28+
MergeOperationRightInputType2,
29+
)
1930

2031

2132
T = TypeVar("T", bound="MergeOperation")
@@ -38,19 +49,31 @@ class MergeOperation:
3849
relationship_type (MergeOperationRelationshipTypeType0 | None | Unset): Control merge relationship behavior:
3950
'many_to_one' (default) allows multiple left rows to match the same right row, 'one_to_one' enforces unique
4051
matches and resolves clashes Default: MergeOperationRelationshipTypeType0.MANY_TO_ONE.
52+
llm (LLMEnumPublic | None | Unset): LLM to use for the merge operation (both initial LLM matching and web
53+
search agent). If not provided, uses system defaults.
54+
document_query_llm (LLMEnumPublic | None | Unset): LLM to use for the document query tool (QDLLM) that reads
55+
and extracts information from web pages. If not provided, defaults to the system default.
4156
session_id (None | Unset | UUID): Session ID. If not provided, a new session is auto-created for this task.
4257
webhook_url (None | str | Unset): Optional URL to receive a POST callback when the task completes or fails.
4358
"""
4459

45-
left_input: list[MergeOperationLeftInputType1Item] | MergeOperationLeftInputType2 | UUID
46-
right_input: list[MergeOperationRightInputType1Item] | MergeOperationRightInputType2 | UUID
60+
left_input: (
61+
list[MergeOperationLeftInputType1Item] | MergeOperationLeftInputType2 | UUID
62+
)
63+
right_input: (
64+
list[MergeOperationRightInputType1Item] | MergeOperationRightInputType2 | UUID
65+
)
4766
task: str
4867
left_key: None | str | Unset = UNSET
4968
right_key: None | str | Unset = UNSET
50-
use_web_search: MergeOperationUseWebSearchType0 | None | Unset = MergeOperationUseWebSearchType0.AUTO
69+
use_web_search: MergeOperationUseWebSearchType0 | None | Unset = (
70+
MergeOperationUseWebSearchType0.AUTO
71+
)
5172
relationship_type: MergeOperationRelationshipTypeType0 | None | Unset = (
5273
MergeOperationRelationshipTypeType0.MANY_TO_ONE
5374
)
75+
llm: LLMEnumPublic | None | Unset = UNSET
76+
document_query_llm: LLMEnumPublic | None | Unset = UNSET
5477
session_id: None | Unset | UUID = UNSET
5578
webhook_url: None | str | Unset = UNSET
5679
additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
@@ -110,6 +133,22 @@ def to_dict(self) -> dict[str, Any]:
110133
else:
111134
relationship_type = self.relationship_type
112135

136+
llm: None | str | Unset
137+
if isinstance(self.llm, Unset):
138+
llm = UNSET
139+
elif isinstance(self.llm, LLMEnumPublic):
140+
llm = self.llm.value
141+
else:
142+
llm = self.llm
143+
144+
document_query_llm: None | str | Unset
145+
if isinstance(self.document_query_llm, Unset):
146+
document_query_llm = UNSET
147+
elif isinstance(self.document_query_llm, LLMEnumPublic):
148+
document_query_llm = self.document_query_llm.value
149+
else:
150+
document_query_llm = self.document_query_llm
151+
113152
session_id: None | str | Unset
114153
if isinstance(self.session_id, Unset):
115154
session_id = UNSET
@@ -141,6 +180,10 @@ def to_dict(self) -> dict[str, Any]:
141180
field_dict["use_web_search"] = use_web_search
142181
if relationship_type is not UNSET:
143182
field_dict["relationship_type"] = relationship_type
183+
if llm is not UNSET:
184+
field_dict["llm"] = llm
185+
if document_query_llm is not UNSET:
186+
field_dict["document_query_llm"] = document_query_llm
144187
if session_id is not UNSET:
145188
field_dict["session_id"] = session_id
146189
if webhook_url is not UNSET:
@@ -150,16 +193,26 @@ def to_dict(self) -> dict[str, Any]:
150193

151194
@classmethod
152195
def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
153-
from ..models.merge_operation_left_input_type_1_item import MergeOperationLeftInputType1Item
154-
from ..models.merge_operation_left_input_type_2 import MergeOperationLeftInputType2
155-
from ..models.merge_operation_right_input_type_1_item import MergeOperationRightInputType1Item
156-
from ..models.merge_operation_right_input_type_2 import MergeOperationRightInputType2
196+
from ..models.merge_operation_left_input_type_1_item import (
197+
MergeOperationLeftInputType1Item,
198+
)
199+
from ..models.merge_operation_left_input_type_2 import (
200+
MergeOperationLeftInputType2,
201+
)
202+
from ..models.merge_operation_right_input_type_1_item import (
203+
MergeOperationRightInputType1Item,
204+
)
205+
from ..models.merge_operation_right_input_type_2 import (
206+
MergeOperationRightInputType2,
207+
)
157208

158209
d = dict(src_dict)
159210

160211
def _parse_left_input(
161212
data: object,
162-
) -> list[MergeOperationLeftInputType1Item] | MergeOperationLeftInputType2 | UUID:
213+
) -> (
214+
list[MergeOperationLeftInputType1Item] | MergeOperationLeftInputType2 | UUID
215+
):
163216
try:
164217
if not isinstance(data, str):
165218
raise TypeError()
@@ -174,7 +227,9 @@ def _parse_left_input(
174227
left_input_type_1 = []
175228
_left_input_type_1 = data
176229
for left_input_type_1_item_data in _left_input_type_1:
177-
left_input_type_1_item = MergeOperationLeftInputType1Item.from_dict(left_input_type_1_item_data)
230+
left_input_type_1_item = MergeOperationLeftInputType1Item.from_dict(
231+
left_input_type_1_item_data
232+
)
178233

179234
left_input_type_1.append(left_input_type_1_item)
180235

@@ -191,7 +246,11 @@ def _parse_left_input(
191246

192247
def _parse_right_input(
193248
data: object,
194-
) -> list[MergeOperationRightInputType1Item] | MergeOperationRightInputType2 | UUID:
249+
) -> (
250+
list[MergeOperationRightInputType1Item]
251+
| MergeOperationRightInputType2
252+
| UUID
253+
):
195254
try:
196255
if not isinstance(data, str):
197256
raise TypeError()
@@ -206,7 +265,11 @@ def _parse_right_input(
206265
right_input_type_1 = []
207266
_right_input_type_1 = data
208267
for right_input_type_1_item_data in _right_input_type_1:
209-
right_input_type_1_item = MergeOperationRightInputType1Item.from_dict(right_input_type_1_item_data)
268+
right_input_type_1_item = (
269+
MergeOperationRightInputType1Item.from_dict(
270+
right_input_type_1_item_data
271+
)
272+
)
210273

211274
right_input_type_1.append(right_input_type_1_item)
212275

@@ -241,7 +304,9 @@ def _parse_right_key(data: object) -> None | str | Unset:
241304

242305
right_key = _parse_right_key(d.pop("right_key", UNSET))
243306

244-
def _parse_use_web_search(data: object) -> MergeOperationUseWebSearchType0 | None | Unset:
307+
def _parse_use_web_search(
308+
data: object,
309+
) -> MergeOperationUseWebSearchType0 | None | Unset:
245310
if data is None:
246311
return data
247312
if isinstance(data, Unset):
@@ -258,7 +323,9 @@ def _parse_use_web_search(data: object) -> MergeOperationUseWebSearchType0 | Non
258323

259324
use_web_search = _parse_use_web_search(d.pop("use_web_search", UNSET))
260325

261-
def _parse_relationship_type(data: object) -> MergeOperationRelationshipTypeType0 | None | Unset:
326+
def _parse_relationship_type(
327+
data: object,
328+
) -> MergeOperationRelationshipTypeType0 | None | Unset:
262329
if data is None:
263330
return data
264331
if isinstance(data, Unset):
@@ -275,6 +342,42 @@ def _parse_relationship_type(data: object) -> MergeOperationRelationshipTypeType
275342

276343
relationship_type = _parse_relationship_type(d.pop("relationship_type", UNSET))
277344

345+
def _parse_llm(data: object) -> LLMEnumPublic | None | Unset:
346+
if data is None:
347+
return data
348+
if isinstance(data, Unset):
349+
return data
350+
try:
351+
if not isinstance(data, str):
352+
raise TypeError()
353+
llm_type_0 = LLMEnumPublic(data)
354+
355+
return llm_type_0
356+
except (TypeError, ValueError, AttributeError, KeyError):
357+
pass
358+
return cast(LLMEnumPublic | None | Unset, data)
359+
360+
llm = _parse_llm(d.pop("llm", UNSET))
361+
362+
def _parse_document_query_llm(data: object) -> LLMEnumPublic | None | Unset:
363+
if data is None:
364+
return data
365+
if isinstance(data, Unset):
366+
return data
367+
try:
368+
if not isinstance(data, str):
369+
raise TypeError()
370+
document_query_llm_type_0 = LLMEnumPublic(data)
371+
372+
return document_query_llm_type_0
373+
except (TypeError, ValueError, AttributeError, KeyError):
374+
pass
375+
return cast(LLMEnumPublic | None | Unset, data)
376+
377+
document_query_llm = _parse_document_query_llm(
378+
d.pop("document_query_llm", UNSET)
379+
)
380+
278381
def _parse_session_id(data: object) -> None | Unset | UUID:
279382
if data is None:
280383
return data
@@ -309,6 +412,8 @@ def _parse_webhook_url(data: object) -> None | str | Unset:
309412
right_key=right_key,
310413
use_web_search=use_web_search,
311414
relationship_type=relationship_type,
415+
llm=llm,
416+
document_query_llm=document_query_llm,
312417
session_id=session_id,
313418
webhook_url=webhook_url,
314419
)

src/futuresearch/ops.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,8 @@ async def merge(
614614
"many_to_one", "one_to_one", "one_to_many", "many_to_many"
615615
]
616616
| None = None,
617+
llm: LLM | None = None,
618+
document_query_llm: LLM | None = None,
617619
) -> MergeResult:
618620
"""Merge two tables using AI (LEFT JOIN semantics).
619621
@@ -626,6 +628,8 @@ async def merge(
626628
merge_on_right: Only set if you expect exact string matches on this column or want to draw agent attention to it. Auto-detected if omitted.
627629
use_web_search: Control web search behavior: "auto" (default) tries LLM merge first then conditionally searches, "no" skips web search entirely, "yes" forces web search on every row.
628630
relationship_type: Control merge relationship type / cardinality between the two tables: "many_to_one" (default) allows multiple left rows to match one right row (e.g. matching reviews to product), "one_to_one" enforces unique matching between left and right rows (e.g. CEO to company), "one_to_many" allows one left row to match multiple right rows (e.g. company to products), "many_to_many" allows multiple left rows to match multiple right rows (e.g. companies to investors). For one_to_many and many_to_many, multiple matches are represented by joining the right-table values with " | " in each added column.
631+
llm: LLM to use for the merge operation (both initial LLM matching and web search agent). If not provided, uses system defaults.
632+
document_query_llm: LLM to use for the document query tool that reads web pages. If not provided, uses system default.
629633
630634
Returns:
631635
MergeResult containing the merged table and match breakdown by method (exact, fuzzy, llm, web)
@@ -649,6 +653,8 @@ async def merge(
649653
merge_on_right=merge_on_right,
650654
use_web_search=use_web_search,
651655
relationship_type=relationship_type,
656+
llm=llm,
657+
document_query_llm=document_query_llm,
652658
)
653659
return await merge_task.await_result()
654660
merge_task = await merge_async(
@@ -660,6 +666,8 @@ async def merge(
660666
merge_on_right=merge_on_right,
661667
use_web_search=use_web_search,
662668
relationship_type=relationship_type,
669+
llm=llm,
670+
document_query_llm=document_query_llm,
663671
)
664672
return await merge_task.await_result()
665673

@@ -676,6 +684,8 @@ async def merge_async(
676684
"many_to_one", "one_to_one", "one_to_many", "many_to_many"
677685
]
678686
| None = None,
687+
llm: LLM | None = None,
688+
document_query_llm: LLM | None = None,
679689
) -> MergeTask:
680690
"""Submit a merge task asynchronously.
681691
@@ -693,6 +703,10 @@ async def merge_async(
693703
right_key=merge_on_right or UNSET,
694704
use_web_search=use_web_search or UNSET, # type: ignore
695705
relationship_type=relationship_type or UNSET, # type: ignore
706+
llm=LLMEnumPublic(llm.value) if llm is not None else UNSET,
707+
document_query_llm=LLMEnumPublic(document_query_llm.value)
708+
if document_query_llm is not None
709+
else UNSET,
696710
session_id=session.session_id,
697711
)
698712

0 commit comments

Comments
 (0)