scaleapi
diff --git a/‎.black.toml‎
Lines changed: 2 additions & 1 deletion b/‎.black.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.worktrees/trace-volume-investigation‎
Lines changed: 1 addition & 0 deletions b/‎.worktrees/trace-volume-investigation‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎clients/python/llmengine/data_types/chat_completion.py‎
Lines changed: 1 addition & 1 deletion b/‎clients/python/llmengine/data_types/chat_completion.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎clients/python/llmengine/data_types/gen/openai.py‎
Lines changed: 24 additions & 23 deletions b/‎clients/python/llmengine/data_types/gen/openai.py‎
Lines changed: 24 additions & 23 deletions
diff --git a/‎model-engine/model_engine_server/common/dtos/llms/chat_completion.py‎
Lines changed: 1 addition & 1 deletion b/‎model-engine/model_engine_server/common/dtos/llms/chat_completion.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎model-engine/model_engine_server/common/pydantic_types.py‎
Lines changed: 1 addition & 0 deletions b/‎model-engine/model_engine_server/common/pydantic_types.py‎
Lines changed: 1 addition & 0 deletions
@@ -3,7 +3,7 @@
 line-length = 100
 target-version = ['py310']
 include = '\.pyi?$'
-exclude = '''
+force-exclude = '''
 (
   /(
       \.eggs         # exclude a few common directories in the
@@ -18,6 +18,7 @@ exclude = '''
     | dist
     | alembic
     | gen
+    | scripts
   )/
 )
 '''
@@ -0,0 +1 @@
+Subproject commit af7145c086ffeeb1c42f5a00566b53dc5dfa1fb5
@@ -14,7 +14,7 @@
 
 
 class ChatCompletionV2Request(CreateChatCompletionRequest, VLLMChatCompletionAdditionalParams):
-    model: str = Field(
+    model: str = Field(  # type: ignore[assignment]  # ModelIdsShared is a RootModel wrapping str
         description="ID of the model to use.",
         examples=["mixtral-8x7b-instruct"],
     )
 
@@ -1,11 +1,11 @@
 # mypy: ignore-errors
 # generated by datamodel-codegen:
 #   filename:  openai-spec.yaml
-#   timestamp: 2025-12-19T20:33:10+00:00
+#   timestamp: 2026-01-10T00:37:57+00:00
 
 from __future__ import annotations
 
-from typing import Annotated, Any, Dict, List, Literal, Optional, Union
+from typing import Any, Dict, List, Literal, Optional, Union
 
 import pydantic
 
@@ -14,6 +14,7 @@
     from pydantic.v1 import AnyUrl, BaseModel, Extra, Field  # noqa: F401
 else:
     from pydantic import AnyUrl, BaseModel, Extra, Field  # type: ignore # noqa: F401
+from typing_extensions import Annotated
 
 
 class AddUploadPartRequest(BaseModel):
@@ -932,10 +933,10 @@ class Audio1(BaseModel):
 
 
 class ChatCompletionResponseMessage(BaseModel):
-    content: Annotated[Optional[str], Field(description="The contents of the message.")]
+    content: Annotated[Optional[str], Field(description="The contents of the message.")] = None
     refusal: Annotated[
         Optional[str], Field(description="The refusal message generated by the model.")
-    ]
+    ] = None
     tool_calls: Optional[ChatCompletionMessageToolCalls] = None
     annotations: Annotated[
         Optional[List[Annotation]],
@@ -3593,7 +3594,7 @@ class MessageStreamEvent3(BaseModel):
 
 
 class Metadata(BaseModel):
-    __root__: Dict[str, str]
+    __root__: Optional[Dict[str, str]] = None
 
 
 class Model(BaseModel):
@@ -6703,7 +6704,7 @@ class VectorStoreFileAttributes1(BaseModel):
 
 
 class VectorStoreFileAttributes(BaseModel):
-    __root__: Dict[str, Union[VectorStoreFileAttributes1, float, bool]]
+    __root__: Optional[Dict[str, Union[VectorStoreFileAttributes1, float, bool]]] = None
 
 
 class FileCounts(BaseModel):
@@ -7728,7 +7729,7 @@ class WebSearchOptions(BaseModel):
         Optional[UserLocation],
         Field(description="Approximate location parameters for the search.\n"),
     ] = None
-    search_context_size: Annotated[Optional[WebSearchContextSize], Field()] = "medium"
+    search_context_size: Optional[WebSearchContextSize] = None
 
 
 class Audio2(BaseModel):
@@ -7761,7 +7762,7 @@ class CreateChatCompletionResponse(BaseModel):
         ),
     ]
     model: Annotated[str, Field(description="The model used for the chat completion.")]
-    service_tier: Annotated[Optional[ServiceTier], Field()] = "auto"
+    service_tier: Optional[ServiceTier] = None
     system_fingerprint: Annotated[
         Optional[str],
         Field(
@@ -7795,7 +7796,7 @@ class CreateChatCompletionStreamResponse(BaseModel):
         ),
     ]
     model: Annotated[str, Field(description="The model to generate the completion.")]
-    service_tier: Annotated[Optional[ServiceTier], Field()] = "auto"
+    service_tier: Optional[ServiceTier] = None
     system_fingerprint: Annotated[
         Optional[str],
         Field(
@@ -8157,7 +8158,7 @@ class Config:
             description="An optional text to guide the model's style or continue a previous audio segment. The [prompt](/docs/guides/speech-to-text#prompting) should match the audio language.\n"
         ),
     ] = None
-    response_format: Annotated[Optional[AudioResponseFormat], Field()] = "json"
+    response_format: Optional[AudioResponseFormat] = None
     temperature: Annotated[
         float,
         Field(
@@ -8358,7 +8359,7 @@ class EvalResponsesSource(BaseModel):
         Field(
             description="Optional reasoning effort parameter. This is a query parameter used to select responses."
         ),
-    ] = "medium"
+    ] = None
     temperature: Annotated[
         Optional[float],
         Field(
@@ -8806,7 +8807,7 @@ class ModelResponseProperties(BaseModel):
             example="user-1234",
         ),
     ] = None
-    service_tier: Annotated[Optional[ServiceTier], Field()] = "auto"
+    service_tier: Optional[ServiceTier] = None
 
 
 class OutputContent(BaseModel):
@@ -9302,7 +9303,7 @@ class RealtimeSessionCreateResponse(BaseModel):
 
 
 class Reasoning(BaseModel):
-    effort: Annotated[Optional[ReasoningEffort], Field()] = "medium"
+    effort: Optional[ReasoningEffort] = None
     summary: Annotated[
         Optional[Literal["auto", "concise", "detailed"]],
         Field(
@@ -9912,7 +9913,7 @@ class Config:
             max_length=256000,
         ),
     ] = None
-    reasoning_effort: Annotated[Optional[ReasoningEffort], Field()] = "medium"
+    reasoning_effort: Optional[ReasoningEffort] = None
     tools: Annotated[
         List[Union[AssistantToolsCode, AssistantToolsFileSearch, AssistantToolsFunction]],
         Field(
@@ -10108,7 +10109,7 @@ class Config:
             example="gpt-4o",
         ),
     ] = None
-    reasoning_effort: Annotated[Optional[ReasoningEffort], Field()] = "medium"
+    reasoning_effort: Optional[ReasoningEffort] = None
     instructions: Annotated[
         Optional[str],
         Field(
@@ -10173,7 +10174,7 @@ class Config:
     ] = None
     truncation_strategy: Optional[TruncationObject] = None
     tool_choice: Optional[AssistantsApiToolChoiceOption] = None
-    parallel_tool_calls: Annotated[Optional[ParallelToolCalls], Field()] = True
+    parallel_tool_calls: Optional[ParallelToolCalls] = None
     response_format: Optional[AssistantsApiResponseFormatOption] = None
 
 
@@ -10293,7 +10294,7 @@ class Config:
     ] = None
     truncation_strategy: Optional[TruncationObject] = None
     tool_choice: Optional[AssistantsApiToolChoiceOption] = None
-    parallel_tool_calls: Annotated[Optional[ParallelToolCalls], Field()] = True
+    parallel_tool_calls: Optional[ParallelToolCalls] = None
     response_format: Optional[AssistantsApiResponseFormatOption] = None
 
 
@@ -10394,7 +10395,7 @@ class FineTuneChatRequestInput(BaseModel):
         Optional[List[ChatCompletionTool]],
         Field(description="A list of tools the model may generate JSON inputs for."),
     ] = None
-    parallel_tool_calls: Annotated[Optional[ParallelToolCalls], Field()] = True
+    parallel_tool_calls: Optional[ParallelToolCalls] = None
     functions: Annotated[
         Optional[List[ChatCompletionFunctions]],
         Field(
@@ -10424,7 +10425,7 @@ class Input5(BaseModel):
         Optional[List[ChatCompletionTool]],
         Field(description="A list of tools the model may generate JSON inputs for."),
     ] = None
-    parallel_tool_calls: Annotated[Optional[ParallelToolCalls], Field()] = True
+    parallel_tool_calls: Optional[ParallelToolCalls] = None
 
 
 class FineTunePreferenceRequestInput(BaseModel):
@@ -10536,7 +10537,7 @@ class Config:
             description="ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models) for descriptions of them.\n"
         ),
     ] = None
-    reasoning_effort: Annotated[Optional[ReasoningEffort], Field()] = "medium"
+    reasoning_effort: Optional[ReasoningEffort] = None
     name: Annotated[
         Optional[str],
         Field(
@@ -11276,13 +11277,13 @@ class CreateChatCompletionRequest(CreateModelResponseProperties):
         ),
     ]
     model: Annotated[
-        str,
+        ModelIdsShared,
         Field(
             description="Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI\noffers a wide range of models with different capabilities, performance\ncharacteristics, and price points. Refer to the [model guide](/docs/models)\nto browse and compare available models.\n"
         ),
     ]
     modalities: Optional[ResponseModalities] = None
-    reasoning_effort: Annotated[Optional[ReasoningEffort], Field()] = "medium"
+    reasoning_effort: Optional[ReasoningEffort] = None
     max_completion_tokens: Annotated[
         Optional[int],
         Field(
@@ -11394,7 +11395,7 @@ class CreateChatCompletionRequest(CreateModelResponseProperties):
         ),
     ] = None
     tool_choice: Optional[ChatCompletionToolChoiceOption] = None
-    parallel_tool_calls: Annotated[Optional[ParallelToolCalls], Field()] = True
+    parallel_tool_calls: Optional[ParallelToolCalls] = None
     function_call: Annotated[
         Optional[Union[Literal["none", "auto"], ChatCompletionFunctionCallOption]],
         Field(
 
@@ -16,7 +16,7 @@
 
 
 class ChatCompletionV2Request(CreateChatCompletionRequest, VLLMChatCompletionAdditionalParams):
-    model: Annotated[
+    model: Annotated[  # type: ignore[assignment]
         str,
         Field(
             description="ID of the model to use.",
 
@@ -4,6 +4,7 @@
 from pydantic import AnyUrl as PyAnyUrl
 from pydantic import AnyWebsocketUrl as PyAnyWebsocketUrl
 from pydantic import BaseModel as PydanticBaseModel
+from pydantic import constr  # noqa: F401
 from pydantic import model_validator  # noqa: F401
 from pydantic import ConfigDict, Field  # noqa: F401
 from pydantic import FileUrl as PyFileUrl
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Subproject commit af7145c086ffeeb1c42f5a00566b53dc5dfa1fb5`
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@`
`14`	`14`
`15`	`15`
`16`	`16`	`class ChatCompletionV2Request(CreateChatCompletionRequest, VLLMChatCompletionAdditionalParams):`
`17`		`- model: str = Field(`
	`17`	`+ model: str = Field( # type: ignore[assignment] # ModelIdsShared is a RootModel wrapping str`
`18`	`18`	`description="ID of the model to use.",`
`19`	`19`	`examples=["mixtral-8x7b-instruct"],`
`20`	`20`	`)`