Skip to content

Commit 9da26d4

Browse files
feat(api): api update
1 parent a3d7c23 commit 9da26d4

14 files changed

Lines changed: 196 additions & 223 deletions

.stats.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 9
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/nanonets%2Fdocstrange-7f3f662020a40e0dbf7b19b724cf5c3e69f82246659477c1d71521c0c3cf62a0.yml
3-
openapi_spec_hash: 53a8f7e0325845d16306ff7cb4272510
4-
config_hash: c02183ee58f815311de9278705163d1a
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/nanonets%2Fdocstrange-d1e63191b34778603117f266822b2f6f73acbc657307949f2ed9bdb546ab1d88.yml
3+
openapi_spec_hash: 06568800f2585c2fe3c6069cc9139d16
4+
config_hash: aed0d6cc8b4cffa1f02021baec0a6da3

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ client = Docstrange()
188188

189189
client.extract.sync(
190190
file=Path("/path/to/file"),
191-
output_format="markdown",
191+
output_format="output_format",
192192
)
193193
```
194194

src/docstrange/resources/chat.py

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -55,20 +55,8 @@ def create_completion(
5555
extra_body: Body | None = None,
5656
timeout: float | httpx.Timeout | None | NotGiven = not_given,
5757
) -> object:
58-
"""OpenAI-compatible chat completions endpoint.
59-
60-
Supports text messages and
61-
file/image inputs via `image_url` or `file_url` parts.
62-
63-
**Limits and behavior:**
64-
65-
- The backend enforces a model context limit of ~20,000 tokens (text +
66-
multimodal tokens).
67-
- `file_url` inputs (e.g., PDFs) are expanded into per-page `image_url` parts
68-
and embedded as data URLs.
69-
- Inline data URLs are limited to ~64MB decoded size per item.
70-
71-
For large or multi-page documents, prefer `/api/v1/extract/*` endpoints.
58+
"""
59+
OpenAI-compatible chat completions endpoint.
7260
7361
Args:
7462
messages: Chat messages array
@@ -135,20 +123,8 @@ async def create_completion(
135123
extra_body: Body | None = None,
136124
timeout: float | httpx.Timeout | None | NotGiven = not_given,
137125
) -> object:
138-
"""OpenAI-compatible chat completions endpoint.
139-
140-
Supports text messages and
141-
file/image inputs via `image_url` or `file_url` parts.
142-
143-
**Limits and behavior:**
144-
145-
- The backend enforces a model context limit of ~20,000 tokens (text +
146-
multimodal tokens).
147-
- `file_url` inputs (e.g., PDFs) are expanded into per-page `image_url` parts
148-
and embedded as data URLs.
149-
- Inline data URLs are limited to ~64MB decoded size per item.
150-
151-
For large or multi-page documents, prefer `/api/v1/extract/*` endpoints.
126+
"""
127+
OpenAI-compatible chat completions endpoint.
152128
153129
Args:
154130
messages: Chat messages array

src/docstrange/resources/extract/extract.py

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ def async_(
7575
output_format: str,
7676
csv_options: str | Omit = omit,
7777
custom_instructions: str | Omit = omit,
78+
file_base64: str | Omit = omit,
79+
file_url: str | Omit = omit,
7880
include_metadata: str | Omit = omit,
7981
json_options: str | Omit = omit,
8082
prompt_mode: Literal["append", "replace"] | Omit = omit,
@@ -93,8 +95,7 @@ def async_(
9395
Recommended for large documents (>50 pages).
9496
9597
Args:
96-
file: File to upload (PDF, Word, Excel, PowerPoint, images). Alternatively use
97-
file_url or file_base64.
98+
file: File to upload (PDF, Word, Excel, PowerPoint, images)
9899
99100
output_format: Output format(s): `markdown`, `html`, `json`, `csv`. Comma-separate for multiple
100101
(e.g., `markdown,json`).
@@ -103,10 +104,13 @@ def async_(
103104
104105
custom_instructions: Custom extraction instructions (e.g., `Format dates as YYYY-MM-DD`)
105106
107+
file_base64: Base64-encoded file content
108+
109+
file_url: URL to download file from
110+
106111
include_metadata: Comma-separated metadata: `bounding_boxes`, `confidence_score`
107112
108-
json_options: JSON extraction options. Values: `hierarchy_output`, `table-of-contents`, field
109-
list `["field1", "field2"]`, or JSON schema `{...}`
113+
json_options: JSON extraction options.
110114
111115
prompt_mode: `append`: add to base prompt, `replace`: use only custom instructions
112116
@@ -130,6 +134,8 @@ def async_(
130134
"output_format": output_format,
131135
"csv_options": csv_options,
132136
"custom_instructions": custom_instructions,
137+
"file_base64": file_base64,
138+
"file_url": file_url,
133139
"include_metadata": include_metadata,
134140
"json_options": json_options,
135141
"prompt_mode": prompt_mode,
@@ -225,14 +231,6 @@ def stream(
225231
Stream extraction results via Server-Sent Events (SSE) for real-time content
226232
delivery.
227233
228-
**Event Types:**
229-
230-
- `content`: Incremental content chunks (streaming mode)
231-
- `complete`: Full content at once (batch mode)
232-
- `done`: Final event with record_id and processing_time
233-
- `error`: Error information
234-
- `async_queued`: Large files queued for async processing
235-
236234
Provide exactly one of: `file`, `file_url`, or `file_base64`.
237235
238236
Args:
@@ -245,8 +243,7 @@ def stream(
245243
246244
custom_instructions: Custom extraction instructions
247245
248-
enable_streaming: Enable real-time streaming. If false, returns complete content via SSE batch
249-
mode.
246+
enable_streaming: Enable real-time streaming.
250247
251248
file_base64: Base64-encoded file content
252249
@@ -303,6 +300,8 @@ def sync(
303300
output_format: str,
304301
csv_options: str | Omit = omit,
305302
custom_instructions: str | Omit = omit,
303+
file_base64: str | Omit = omit,
304+
file_url: str | Omit = omit,
306305
include_metadata: str | Omit = omit,
307306
json_options: str | Omit = omit,
308307
prompt_mode: Literal["append", "replace"] | Omit = omit,
@@ -321,8 +320,7 @@ def sync(
321320
Provide exactly one of: `file`, `file_url`, or `file_base64`.
322321
323322
Args:
324-
file: File to upload (PDF, Word, Excel, PowerPoint, images). Alternatively use
325-
file_url or file_base64.
323+
file: File to upload (PDF, Word, Excel, PowerPoint, images)
326324
327325
output_format: Output format(s): `markdown`, `html`, `json`, `csv`. Comma-separate for multiple
328326
(e.g., `markdown,json`).
@@ -331,10 +329,13 @@ def sync(
331329
332330
custom_instructions: Custom extraction instructions (e.g., `Format dates as YYYY-MM-DD`)
333331
332+
file_base64: Base64-encoded file content
333+
334+
file_url: URL to download file from
335+
334336
include_metadata: Comma-separated metadata: `bounding_boxes`, `confidence_score`
335337
336-
json_options: JSON extraction options. Values: `hierarchy_output`, `table-of-contents`, field
337-
list `["field1", "field2"]`, or JSON schema `{...}`
338+
json_options: JSON extraction options.
338339
339340
prompt_mode: `append`: add to base prompt, `replace`: use only custom instructions
340341
@@ -358,6 +359,8 @@ def sync(
358359
"output_format": output_format,
359360
"csv_options": csv_options,
360361
"custom_instructions": custom_instructions,
362+
"file_base64": file_base64,
363+
"file_url": file_url,
361364
"include_metadata": include_metadata,
362365
"json_options": json_options,
363366
"prompt_mode": prompt_mode,
@@ -402,6 +405,8 @@ async def async_(
402405
output_format: str,
403406
csv_options: str | Omit = omit,
404407
custom_instructions: str | Omit = omit,
408+
file_base64: str | Omit = omit,
409+
file_url: str | Omit = omit,
405410
include_metadata: str | Omit = omit,
406411
json_options: str | Omit = omit,
407412
prompt_mode: Literal["append", "replace"] | Omit = omit,
@@ -420,8 +425,7 @@ async def async_(
420425
Recommended for large documents (>50 pages).
421426
422427
Args:
423-
file: File to upload (PDF, Word, Excel, PowerPoint, images). Alternatively use
424-
file_url or file_base64.
428+
file: File to upload (PDF, Word, Excel, PowerPoint, images)
425429
426430
output_format: Output format(s): `markdown`, `html`, `json`, `csv`. Comma-separate for multiple
427431
(e.g., `markdown,json`).
@@ -430,10 +434,13 @@ async def async_(
430434
431435
custom_instructions: Custom extraction instructions (e.g., `Format dates as YYYY-MM-DD`)
432436
437+
file_base64: Base64-encoded file content
438+
439+
file_url: URL to download file from
440+
433441
include_metadata: Comma-separated metadata: `bounding_boxes`, `confidence_score`
434442
435-
json_options: JSON extraction options. Values: `hierarchy_output`, `table-of-contents`, field
436-
list `["field1", "field2"]`, or JSON schema `{...}`
443+
json_options: JSON extraction options.
437444
438445
prompt_mode: `append`: add to base prompt, `replace`: use only custom instructions
439446
@@ -457,6 +464,8 @@ async def async_(
457464
"output_format": output_format,
458465
"csv_options": csv_options,
459466
"custom_instructions": custom_instructions,
467+
"file_base64": file_base64,
468+
"file_url": file_url,
460469
"include_metadata": include_metadata,
461470
"json_options": json_options,
462471
"prompt_mode": prompt_mode,
@@ -552,14 +561,6 @@ async def stream(
552561
Stream extraction results via Server-Sent Events (SSE) for real-time content
553562
delivery.
554563
555-
**Event Types:**
556-
557-
- `content`: Incremental content chunks (streaming mode)
558-
- `complete`: Full content at once (batch mode)
559-
- `done`: Final event with record_id and processing_time
560-
- `error`: Error information
561-
- `async_queued`: Large files queued for async processing
562-
563564
Provide exactly one of: `file`, `file_url`, or `file_base64`.
564565
565566
Args:
@@ -572,8 +573,7 @@ async def stream(
572573
573574
custom_instructions: Custom extraction instructions
574575
575-
enable_streaming: Enable real-time streaming. If false, returns complete content via SSE batch
576-
mode.
576+
enable_streaming: Enable real-time streaming.
577577
578578
file_base64: Base64-encoded file content
579579
@@ -630,6 +630,8 @@ async def sync(
630630
output_format: str,
631631
csv_options: str | Omit = omit,
632632
custom_instructions: str | Omit = omit,
633+
file_base64: str | Omit = omit,
634+
file_url: str | Omit = omit,
633635
include_metadata: str | Omit = omit,
634636
json_options: str | Omit = omit,
635637
prompt_mode: Literal["append", "replace"] | Omit = omit,
@@ -648,8 +650,7 @@ async def sync(
648650
Provide exactly one of: `file`, `file_url`, or `file_base64`.
649651
650652
Args:
651-
file: File to upload (PDF, Word, Excel, PowerPoint, images). Alternatively use
652-
file_url or file_base64.
653+
file: File to upload (PDF, Word, Excel, PowerPoint, images)
653654
654655
output_format: Output format(s): `markdown`, `html`, `json`, `csv`. Comma-separate for multiple
655656
(e.g., `markdown,json`).
@@ -658,10 +659,13 @@ async def sync(
658659
659660
custom_instructions: Custom extraction instructions (e.g., `Format dates as YYYY-MM-DD`)
660661
662+
file_base64: Base64-encoded file content
663+
664+
file_url: URL to download file from
665+
661666
include_metadata: Comma-separated metadata: `bounding_boxes`, `confidence_score`
662667
663-
json_options: JSON extraction options. Values: `hierarchy_output`, `table-of-contents`, field
664-
list `["field1", "field2"]`, or JSON schema `{...}`
668+
json_options: JSON extraction options.
665669
666670
prompt_mode: `append`: add to base prompt, `replace`: use only custom instructions
667671
@@ -685,6 +689,8 @@ async def sync(
685689
"output_format": output_format,
686690
"csv_options": csv_options,
687691
"custom_instructions": custom_instructions,
692+
"file_base64": file_base64,
693+
"file_url": file_url,
688694
"include_metadata": include_metadata,
689695
"json_options": json_options,
690696
"prompt_mode": prompt_mode,

src/docstrange/types/chat_create_completion_params.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class MessageContent(TypedDict, total=False):
3939

4040
class Message(TypedDict, total=False):
4141
content: Required[Iterable[MessageContent]]
42-
"""Message content as an array of parts (text or file/image URLs)."""
42+
"""Message content as an array of parts."""
4343

4444
role: Required[str]
4545
"""Message role (system, user, assistant)"""

src/docstrange/types/extract_async_params.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,7 @@
1111

1212
class ExtractAsyncParams(TypedDict, total=False):
1313
file: Required[FileTypes]
14-
"""File to upload (PDF, Word, Excel, PowerPoint, images).
15-
16-
Alternatively use file_url or file_base64.
17-
"""
14+
"""File to upload (PDF, Word, Excel, PowerPoint, images)"""
1815

1916
output_format: Required[str]
2017
"""Output format(s): `markdown`, `html`, `json`, `csv`.
@@ -28,15 +25,17 @@ class ExtractAsyncParams(TypedDict, total=False):
2825
custom_instructions: str
2926
"""Custom extraction instructions (e.g., `Format dates as YYYY-MM-DD`)"""
3027

28+
file_base64: str
29+
"""Base64-encoded file content"""
30+
31+
file_url: str
32+
"""URL to download file from"""
33+
3134
include_metadata: str
3235
"""Comma-separated metadata: `bounding_boxes`, `confidence_score`"""
3336

3437
json_options: str
35-
"""JSON extraction options.
36-
37-
Values: `hierarchy_output`, `table-of-contents`, field list
38-
`["field1", "field2"]`, or JSON schema `{...}`
39-
"""
38+
"""JSON extraction options."""
4039

4140
prompt_mode: Literal["append", "replace"]
4241
"""`append`: add to base prompt, `replace`: use only custom instructions"""

src/docstrange/types/extract_stream_params.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,7 @@ class ExtractStreamParams(TypedDict, total=False):
2626
"""Custom extraction instructions"""
2727

2828
enable_streaming: bool
29-
"""Enable real-time streaming.
30-
31-
If false, returns complete content via SSE batch mode.
32-
"""
29+
"""Enable real-time streaming."""
3330

3431
file_base64: str
3532
"""Base64-encoded file content"""

src/docstrange/types/extract_sync_params.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,7 @@
1111

1212
class ExtractSyncParams(TypedDict, total=False):
1313
file: Required[FileTypes]
14-
"""File to upload (PDF, Word, Excel, PowerPoint, images).
15-
16-
Alternatively use file_url or file_base64.
17-
"""
14+
"""File to upload (PDF, Word, Excel, PowerPoint, images)"""
1815

1916
output_format: Required[str]
2017
"""Output format(s): `markdown`, `html`, `json`, `csv`.
@@ -28,15 +25,17 @@ class ExtractSyncParams(TypedDict, total=False):
2825
custom_instructions: str
2926
"""Custom extraction instructions (e.g., `Format dates as YYYY-MM-DD`)"""
3027

28+
file_base64: str
29+
"""Base64-encoded file content"""
30+
31+
file_url: str
32+
"""URL to download file from"""
33+
3134
include_metadata: str
3235
"""Comma-separated metadata: `bounding_boxes`, `confidence_score`"""
3336

3437
json_options: str
35-
"""JSON extraction options.
36-
37-
Values: `hierarchy_output`, `table-of-contents`, field list
38-
`["field1", "field2"]`, or JSON schema `{...}`
39-
"""
38+
"""JSON extraction options."""
4039

4140
prompt_mode: Literal["append", "replace"]
4241
"""`append`: add to base prompt, `replace`: use only custom instructions"""
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22

3-
from typing import Union, Optional
3+
from typing import Optional
44

55
from .._models import BaseModel
66
from .extraction_metadata import ExtractionMetadata
@@ -9,7 +9,7 @@
99

1010

1111
class ExtractionFormatResult(BaseModel):
12-
content: Union[str, object]
13-
"""Extracted content"""
12+
content: object
13+
"""Extracted content (string or object)"""
1414

1515
metadata: Optional[ExtractionMetadata] = None

0 commit comments

Comments
 (0)