From 78d11d4151314f0e3bebfc0e2f31f48436d4111b Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 3 Jul 2026 11:48:34 +0200 Subject: [PATCH 1/3] refactor: unify alias vs validation_alias usage in Pydantic models --- .rules.md | 15 +++++++++++ src/apify/_actor.py | 4 +-- src/apify/_configuration.py | 50 ++++++++++++++++++------------------- 3 files changed, 41 insertions(+), 28 deletions(-) diff --git a/.rules.md b/.rules.md index a6834c02..259d37b5 100644 --- a/.rules.md +++ b/.rules.md @@ -59,6 +59,21 @@ uv run poe e2e-tests - `chore:` — dependency bumps, tooling, and other housekeeping - `build:` — build system changes +### Pydantic aliases + +Choose `alias` vs `validation_alias` deliberately — they differ in serialization. `alias` affects +both validation and serialization (the field dumps under the alias with `model_dump(by_alias=True)`); +`validation_alias` affects only validation (the field still dumps under its Python name). + +- **`Configuration` fields** (`_configuration.py`) are populated only from environment variables and + are never serialized back under their env-var names. Always use `validation_alias` — a plain string + for a single env var (`validation_alias='apify_token'`), or `AliasChoices(...)` for several legacy + names. Never use `alias` here; it would leak the env-var name into `model_dump(by_alias=True)`. +- **API / event / data models** (camelCase JSON round-trip with the platform) use + `alias_generator=to_camel` / `alias`, which is correct because both directions need the camelCase + name. When one field's serialized name must differ from what the generator derives, set both + `validation_alias` and `serialization_alias` explicitly (see `RequestQueueHead.lock_time`). + ## Architecture ### Core (`src/apify/`) diff --git a/src/apify/_actor.py b/src/apify/_actor.py index 23c70e9d..0d41464a 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -865,9 +865,7 @@ def get_env(self) -> dict: if field.deprecated: continue - if field.alias: - aliases = [field.alias] - elif isinstance(field.validation_alias, str): + if isinstance(field.validation_alias, str): aliases = [field.validation_alias] elif isinstance(field.validation_alias, AliasChoices): aliases = cast('list[str]', field.validation_alias.choices) diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index 18c8f9bc..48e0d0d5 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -204,7 +204,7 @@ class Configuration(CrawleeConfiguration): api_base_url: Annotated[ str, Field( - alias='apify_api_base_url', + validation_alias='apify_api_base_url', description='Internal URL of the Apify API. May be used to interact with the platform programmatically', ), ] = 'https://api.apify.com' @@ -212,7 +212,7 @@ class Configuration(CrawleeConfiguration): api_public_base_url: Annotated[ str, Field( - alias='apify_api_public_base_url', + validation_alias='apify_api_public_base_url', description='Public URL of the Apify API. May be used to link to REST API resources', ), ] = 'https://api.apify.com' @@ -220,7 +220,7 @@ class Configuration(CrawleeConfiguration): dedicated_cpus: Annotated[ float | None, Field( - alias='apify_dedicated_cpus', + validation_alias='apify_dedicated_cpus', description='Number of CPU cores reserved for the actor, based on allocated memory', ), ] = None @@ -261,13 +261,13 @@ class Configuration(CrawleeConfiguration): disable_outdated_warning: Annotated[ bool, Field( - alias='apify_disable_outdated_warning', + validation_alias='apify_disable_outdated_warning', description='Controls the display of outdated SDK version warnings', ), BeforeValidator(lambda val: val or False), ] = False - fact: Annotated[str | None, Field(alias='apify_fact')] = None + fact: Annotated[str | None, Field(validation_alias='apify_fact')] = None input_key: Annotated[ str, @@ -284,7 +284,7 @@ class Configuration(CrawleeConfiguration): input_secrets_private_key_file: Annotated[ str | None, Field( - alias='apify_input_secrets_private_key_file', + validation_alias='apify_input_secrets_private_key_file', description='Path to the secret key used to decrypt Secret inputs.', ), ] = None @@ -292,7 +292,7 @@ class Configuration(CrawleeConfiguration): input_secrets_private_key_passphrase: Annotated[ str | None, Field( - alias='apify_input_secrets_private_key_passphrase', + validation_alias='apify_input_secrets_private_key_passphrase', description='Passphrase for the input secret key', ), ] = None @@ -300,7 +300,7 @@ class Configuration(CrawleeConfiguration): is_at_home: Annotated[ bool, Field( - alias='apify_is_at_home', + validation_alias='apify_is_at_home', description='True if the Actor is running on Apify servers', ), ] = False @@ -308,7 +308,7 @@ class Configuration(CrawleeConfiguration): max_paid_dataset_items: Annotated[ int | None, Field( - alias='actor_max_paid_dataset_items', + validation_alias='actor_max_paid_dataset_items', description='For paid-per-result Actors, the user-set limit on returned results. Do not exceed this limit', ), BeforeValidator(_default_if_empty(default=None)), @@ -317,7 +317,7 @@ class Configuration(CrawleeConfiguration): max_total_charge_usd: Annotated[ Decimal | None, Field( - alias='actor_max_total_charge_usd', + validation_alias='actor_max_total_charge_usd', description='For pay-per-event Actors, the user-set limit on total charges. Do not exceed this limit', ), BeforeValidator(_default_if_empty(default=None)), @@ -326,7 +326,7 @@ class Configuration(CrawleeConfiguration): test_pay_per_event: Annotated[ bool, Field( - alias='actor_test_pay_per_event', + validation_alias='actor_test_pay_per_event', description='Enable pay-per-event functionality for local development', ), ] = False @@ -334,7 +334,7 @@ class Configuration(CrawleeConfiguration): meta_origin: Annotated[ str | None, Field( - alias='apify_meta_origin', + validation_alias='apify_meta_origin', description='Specifies how an Actor run was started', ), ] = None @@ -342,7 +342,7 @@ class Configuration(CrawleeConfiguration): metamorph_after_sleep: Annotated[ timedelta_ms, Field( - alias='apify_metamorph_after_sleep_millis', + validation_alias='apify_metamorph_after_sleep_millis', description='How long the Actor needs to wait before exiting after triggering a metamorph', ), ] = timedelta(minutes=5) @@ -350,7 +350,7 @@ class Configuration(CrawleeConfiguration): proxy_hostname: Annotated[ str, Field( - alias='apify_proxy_hostname', + validation_alias='apify_proxy_hostname', description='Hostname of the Apify proxy', ), ] = 'proxy.apify.com' @@ -358,7 +358,7 @@ class Configuration(CrawleeConfiguration): proxy_password: Annotated[ str | None, Field( - alias='apify_proxy_password', + validation_alias='apify_proxy_password', description='Password to the Apify proxy', ), ] = None @@ -366,7 +366,7 @@ class Configuration(CrawleeConfiguration): proxy_port: Annotated[ int, Field( - alias='apify_proxy_port', + validation_alias='apify_proxy_port', description='Port to communicate with the Apify proxy', ), ] = 8000 @@ -374,7 +374,7 @@ class Configuration(CrawleeConfiguration): proxy_status_url: Annotated[ str, Field( - alias='apify_proxy_status_url', + validation_alias='apify_proxy_status_url', description='URL for retrieving proxy status information', ), ] = 'http://proxy.apify.com' @@ -406,7 +406,7 @@ class Configuration(CrawleeConfiguration): str, BeforeValidator(validate_http_url), Field( - alias='actor_standby_url', + validation_alias='actor_standby_url', description='URL for accessing web servers of Actor runs in Standby mode', ), ] = 'http://localhost' @@ -414,7 +414,7 @@ class Configuration(CrawleeConfiguration): token: Annotated[ str | None, Field( - alias='apify_token', + validation_alias='apify_token', description='API token of the user who started the Actor', ), ] = None @@ -422,7 +422,7 @@ class Configuration(CrawleeConfiguration): user_id: Annotated[ str | None, Field( - alias='apify_user_id', + validation_alias='apify_user_id', description='ID of the user who started the Actor. May differ from the Actor owner', ), ] = None @@ -430,7 +430,7 @@ class Configuration(CrawleeConfiguration): user_is_paying: Annotated[ bool, Field( - alias='apify_user_is_paying', + validation_alias='apify_user_is_paying', description='True if the user calling the Actor is paying user', ), BeforeValidator(_default_if_empty(default=False)), @@ -462,7 +462,7 @@ class Configuration(CrawleeConfiguration): workflow_key: Annotated[ str | None, Field( - alias='apify_workflow_key', + validation_alias='apify_workflow_key', description='Identifier used for grouping related runs and API calls together', ), ] = None @@ -474,7 +474,7 @@ class Configuration(CrawleeConfiguration): | PayPerEventActorPricingInfo | None, Field( - alias='apify_actor_pricing_info', + validation_alias='apify_actor_pricing_info', description='JSON string with pricing info of the actor', discriminator='pricing_model', ), @@ -484,7 +484,7 @@ class Configuration(CrawleeConfiguration): charged_event_counts: Annotated[ dict[str, int] | None, Field( - alias='apify_charged_actor_event_counts', + validation_alias='apify_charged_actor_event_counts', description='Counts of events that were charged for the actor', ), BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data or None), @@ -493,7 +493,7 @@ class Configuration(CrawleeConfiguration): actor_storages: Annotated[ ActorStorages | None, Field( - alias='actor_storages_json', + validation_alias='actor_storages_json', description='Mapping of storage aliases to their platform-assigned IDs.', ), BeforeValidator(_load_storage_keys), From 8eb5974ef89954589f77b4075420c8968a5af36d Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 3 Jul 2026 12:36:46 +0200 Subject: [PATCH 2/3] refactor: prefer alias, pair validation_alias with serialization_alias for multi-name config fields --- .rules.md | 23 +++++++------ src/apify/_actor.py | 4 ++- src/apify/_configuration.py | 64 ++++++++++++++++++++++--------------- 3 files changed, 53 insertions(+), 38 deletions(-) diff --git a/.rules.md b/.rules.md index 259d37b5..798fd693 100644 --- a/.rules.md +++ b/.rules.md @@ -61,18 +61,17 @@ uv run poe e2e-tests ### Pydantic aliases -Choose `alias` vs `validation_alias` deliberately — they differ in serialization. `alias` affects -both validation and serialization (the field dumps under the alias with `model_dump(by_alias=True)`); -`validation_alias` affects only validation (the field still dumps under its Python name). - -- **`Configuration` fields** (`_configuration.py`) are populated only from environment variables and - are never serialized back under their env-var names. Always use `validation_alias` — a plain string - for a single env var (`validation_alias='apify_token'`), or `AliasChoices(...)` for several legacy - names. Never use `alias` here; it would leak the env-var name into `model_dump(by_alias=True)`. -- **API / event / data models** (camelCase JSON round-trip with the platform) use - `alias_generator=to_camel` / `alias`, which is correct because both directions need the camelCase - name. When one field's serialized name must differ from what the generator derives, set both - `validation_alias` and `serialization_alias` explicitly (see `RequestQueueHead.lock_time`). +Prefer `alias`. It sets the validation and the serialization name at once, so a field reads and dumps +(`model_dump(by_alias=True)`) under the same name. Reach for `validation_alias` only when a single +`alias` can't express what's needed, and then always pair it with `serialization_alias`. Never leave +a field with `validation_alias` alone — it would silently dump under its Python name. In short: either +just `alias`, or both `validation_alias` and `serialization_alias`. + +- **Single name** → `alias='apify_token'`. Used by most `Configuration` fields and, via + `alias_generator=to_camel`, by the API / event / data models that round-trip camelCase JSON. +- **Several input names, or an input name that differs from the output name** → + `validation_alias=AliasChoices('actor_id', 'apify_actor_id')` + `serialization_alias='actor_id'`. + Used by `Configuration` fields with legacy env-var aliases and by `RequestQueueHead.lock_time`. ## Architecture diff --git a/src/apify/_actor.py b/src/apify/_actor.py index 0d41464a..23c70e9d 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -865,7 +865,9 @@ def get_env(self) -> dict: if field.deprecated: continue - if isinstance(field.validation_alias, str): + if field.alias: + aliases = [field.alias] + elif isinstance(field.validation_alias, str): aliases = [field.validation_alias] elif isinstance(field.validation_alias, AliasChoices): aliases = cast('list[str]', field.validation_alias.choices) diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index 48e0d0d5..2eea0a31 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -119,6 +119,7 @@ class Configuration(CrawleeConfiguration): 'apify_actor_id', 'apify_act_id', ), + serialization_alias='actor_id', description='ID of the Actor', ), ] = None @@ -145,6 +146,7 @@ class Configuration(CrawleeConfiguration): 'apify_actor_run_id', 'apify_act_run_id', ), + serialization_alias='actor_run_id', description='ID of the Actor run', ), ] = None @@ -156,6 +158,7 @@ class Configuration(CrawleeConfiguration): 'actor_build_id', 'apify_actor_build_id', ), + serialization_alias='actor_build_id', description='ID of the Actor build used in the run', ), ] = None @@ -167,6 +170,7 @@ class Configuration(CrawleeConfiguration): 'actor_build_number', 'apify_actor_build_number', ), + serialization_alias='actor_build_number', description='Build number of the Actor build used in the run', ), ] = None @@ -186,6 +190,7 @@ class Configuration(CrawleeConfiguration): 'actor_task_id', 'apify_actor_task_id', ), + serialization_alias='actor_task_id', description='ID of the Actor task. Empty if Actor is run outside of any task, e.g. directly using the API', ), ] = None @@ -197,6 +202,7 @@ class Configuration(CrawleeConfiguration): 'actor_events_websocket_url', 'apify_actor_events_ws_url', ), + serialization_alias='actor_events_websocket_url', description='Websocket URL where Actor may listen for events from Actor platform', ), ] = None @@ -204,7 +210,7 @@ class Configuration(CrawleeConfiguration): api_base_url: Annotated[ str, Field( - validation_alias='apify_api_base_url', + alias='apify_api_base_url', description='Internal URL of the Apify API. May be used to interact with the platform programmatically', ), ] = 'https://api.apify.com' @@ -212,7 +218,7 @@ class Configuration(CrawleeConfiguration): api_public_base_url: Annotated[ str, Field( - validation_alias='apify_api_public_base_url', + alias='apify_api_public_base_url', description='Public URL of the Apify API. May be used to link to REST API resources', ), ] = 'https://api.apify.com' @@ -220,7 +226,7 @@ class Configuration(CrawleeConfiguration): dedicated_cpus: Annotated[ float | None, Field( - validation_alias='apify_dedicated_cpus', + alias='apify_dedicated_cpus', description='Number of CPU cores reserved for the actor, based on allocated memory', ), ] = None @@ -232,6 +238,7 @@ class Configuration(CrawleeConfiguration): 'actor_default_dataset_id', 'apify_default_dataset_id', ), + serialization_alias='actor_default_dataset_id', description='Default dataset ID used by the Apify storage client when no ID or name is provided.', ), ] = None @@ -243,6 +250,7 @@ class Configuration(CrawleeConfiguration): 'actor_default_key_value_store_id', 'apify_default_key_value_store_id', ), + serialization_alias='actor_default_key_value_store_id', description='Default key-value store ID for the Apify storage client when no ID or name is provided.', ), ] = None @@ -254,6 +262,7 @@ class Configuration(CrawleeConfiguration): 'actor_default_request_queue_id', 'apify_default_request_queue_id', ), + serialization_alias='actor_default_request_queue_id', description='Default request queue ID for the Apify storage client when no ID or name is provided.', ), ] = None @@ -261,13 +270,13 @@ class Configuration(CrawleeConfiguration): disable_outdated_warning: Annotated[ bool, Field( - validation_alias='apify_disable_outdated_warning', + alias='apify_disable_outdated_warning', description='Controls the display of outdated SDK version warnings', ), BeforeValidator(lambda val: val or False), ] = False - fact: Annotated[str | None, Field(validation_alias='apify_fact')] = None + fact: Annotated[str | None, Field(alias='apify_fact')] = None input_key: Annotated[ str, @@ -277,6 +286,7 @@ class Configuration(CrawleeConfiguration): 'apify_input_key', 'crawlee_input_key', ), + serialization_alias='actor_input_key', description='Key of the record in the default key-value store that holds the Actor input', ), ] = 'INPUT' @@ -284,7 +294,7 @@ class Configuration(CrawleeConfiguration): input_secrets_private_key_file: Annotated[ str | None, Field( - validation_alias='apify_input_secrets_private_key_file', + alias='apify_input_secrets_private_key_file', description='Path to the secret key used to decrypt Secret inputs.', ), ] = None @@ -292,7 +302,7 @@ class Configuration(CrawleeConfiguration): input_secrets_private_key_passphrase: Annotated[ str | None, Field( - validation_alias='apify_input_secrets_private_key_passphrase', + alias='apify_input_secrets_private_key_passphrase', description='Passphrase for the input secret key', ), ] = None @@ -300,7 +310,7 @@ class Configuration(CrawleeConfiguration): is_at_home: Annotated[ bool, Field( - validation_alias='apify_is_at_home', + alias='apify_is_at_home', description='True if the Actor is running on Apify servers', ), ] = False @@ -308,7 +318,7 @@ class Configuration(CrawleeConfiguration): max_paid_dataset_items: Annotated[ int | None, Field( - validation_alias='actor_max_paid_dataset_items', + alias='actor_max_paid_dataset_items', description='For paid-per-result Actors, the user-set limit on returned results. Do not exceed this limit', ), BeforeValidator(_default_if_empty(default=None)), @@ -317,7 +327,7 @@ class Configuration(CrawleeConfiguration): max_total_charge_usd: Annotated[ Decimal | None, Field( - validation_alias='actor_max_total_charge_usd', + alias='actor_max_total_charge_usd', description='For pay-per-event Actors, the user-set limit on total charges. Do not exceed this limit', ), BeforeValidator(_default_if_empty(default=None)), @@ -326,7 +336,7 @@ class Configuration(CrawleeConfiguration): test_pay_per_event: Annotated[ bool, Field( - validation_alias='actor_test_pay_per_event', + alias='actor_test_pay_per_event', description='Enable pay-per-event functionality for local development', ), ] = False @@ -334,7 +344,7 @@ class Configuration(CrawleeConfiguration): meta_origin: Annotated[ str | None, Field( - validation_alias='apify_meta_origin', + alias='apify_meta_origin', description='Specifies how an Actor run was started', ), ] = None @@ -342,7 +352,7 @@ class Configuration(CrawleeConfiguration): metamorph_after_sleep: Annotated[ timedelta_ms, Field( - validation_alias='apify_metamorph_after_sleep_millis', + alias='apify_metamorph_after_sleep_millis', description='How long the Actor needs to wait before exiting after triggering a metamorph', ), ] = timedelta(minutes=5) @@ -350,7 +360,7 @@ class Configuration(CrawleeConfiguration): proxy_hostname: Annotated[ str, Field( - validation_alias='apify_proxy_hostname', + alias='apify_proxy_hostname', description='Hostname of the Apify proxy', ), ] = 'proxy.apify.com' @@ -358,7 +368,7 @@ class Configuration(CrawleeConfiguration): proxy_password: Annotated[ str | None, Field( - validation_alias='apify_proxy_password', + alias='apify_proxy_password', description='Password to the Apify proxy', ), ] = None @@ -366,7 +376,7 @@ class Configuration(CrawleeConfiguration): proxy_port: Annotated[ int, Field( - validation_alias='apify_proxy_port', + alias='apify_proxy_port', description='Port to communicate with the Apify proxy', ), ] = 8000 @@ -374,7 +384,7 @@ class Configuration(CrawleeConfiguration): proxy_status_url: Annotated[ str, Field( - validation_alias='apify_proxy_status_url', + alias='apify_proxy_status_url', description='URL for retrieving proxy status information', ), ] = 'http://proxy.apify.com' @@ -386,6 +396,7 @@ class Configuration(CrawleeConfiguration): 'actor_started_at', 'apify_started_at', ), + serialization_alias='actor_started_at', description='Date when the Actor was started', ), ] = None @@ -397,6 +408,7 @@ class Configuration(CrawleeConfiguration): 'actor_timeout_at', 'apify_timeout_at', ), + serialization_alias='actor_timeout_at', description='Date when the Actor will time out', ), BeforeValidator(_default_if_empty(default=None)), @@ -406,7 +418,7 @@ class Configuration(CrawleeConfiguration): str, BeforeValidator(validate_http_url), Field( - validation_alias='actor_standby_url', + alias='actor_standby_url', description='URL for accessing web servers of Actor runs in Standby mode', ), ] = 'http://localhost' @@ -414,7 +426,7 @@ class Configuration(CrawleeConfiguration): token: Annotated[ str | None, Field( - validation_alias='apify_token', + alias='apify_token', description='API token of the user who started the Actor', ), ] = None @@ -422,7 +434,7 @@ class Configuration(CrawleeConfiguration): user_id: Annotated[ str | None, Field( - validation_alias='apify_user_id', + alias='apify_user_id', description='ID of the user who started the Actor. May differ from the Actor owner', ), ] = None @@ -430,7 +442,7 @@ class Configuration(CrawleeConfiguration): user_is_paying: Annotated[ bool, Field( - validation_alias='apify_user_is_paying', + alias='apify_user_is_paying', description='True if the user calling the Actor is paying user', ), BeforeValidator(_default_if_empty(default=False)), @@ -443,6 +455,7 @@ class Configuration(CrawleeConfiguration): 'actor_web_server_port', 'apify_container_port', ), + serialization_alias='actor_web_server_port', description='TCP port for the Actor to start an HTTP server on. ' 'This server can be used to receive external messages or expose monitoring and control interfaces', ), @@ -455,6 +468,7 @@ class Configuration(CrawleeConfiguration): 'actor_web_server_url', 'apify_container_url', ), + serialization_alias='actor_web_server_url', description='Unique public URL for accessing a specific Actor run web server from the outside world', ), ] = 'http://localhost:4321' @@ -462,7 +476,7 @@ class Configuration(CrawleeConfiguration): workflow_key: Annotated[ str | None, Field( - validation_alias='apify_workflow_key', + alias='apify_workflow_key', description='Identifier used for grouping related runs and API calls together', ), ] = None @@ -474,7 +488,7 @@ class Configuration(CrawleeConfiguration): | PayPerEventActorPricingInfo | None, Field( - validation_alias='apify_actor_pricing_info', + alias='apify_actor_pricing_info', description='JSON string with pricing info of the actor', discriminator='pricing_model', ), @@ -484,7 +498,7 @@ class Configuration(CrawleeConfiguration): charged_event_counts: Annotated[ dict[str, int] | None, Field( - validation_alias='apify_charged_actor_event_counts', + alias='apify_charged_actor_event_counts', description='Counts of events that were charged for the actor', ), BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data or None), @@ -493,7 +507,7 @@ class Configuration(CrawleeConfiguration): actor_storages: Annotated[ ActorStorages | None, Field( - validation_alias='actor_storages_json', + alias='actor_storages_json', description='Mapping of storage aliases to their platform-assigned IDs.', ), BeforeValidator(_load_storage_keys), From 1fb0cbcd5b6805258412c9e147b9e956b1ab6399 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 3 Jul 2026 12:46:52 +0200 Subject: [PATCH 3/3] refactor: serialize Configuration under camelCase via alias_generator --- .rules.md | 23 ++++++------ src/apify/_configuration.py | 72 +++++++++++++++++-------------------- 2 files changed, 44 insertions(+), 51 deletions(-) diff --git a/.rules.md b/.rules.md index 798fd693..7f15c3e2 100644 --- a/.rules.md +++ b/.rules.md @@ -61,17 +61,18 @@ uv run poe e2e-tests ### Pydantic aliases -Prefer `alias`. It sets the validation and the serialization name at once, so a field reads and dumps -(`model_dump(by_alias=True)`) under the same name. Reach for `validation_alias` only when a single -`alias` can't express what's needed, and then always pair it with `serialization_alias`. Never leave -a field with `validation_alias` alone — it would silently dump under its Python name. In short: either -just `alias`, or both `validation_alias` and `serialization_alias`. - -- **Single name** → `alias='apify_token'`. Used by most `Configuration` fields and, via - `alias_generator=to_camel`, by the API / event / data models that round-trip camelCase JSON. -- **Several input names, or an input name that differs from the output name** → - `validation_alias=AliasChoices('actor_id', 'apify_actor_id')` + `serialization_alias='actor_id'`. - Used by `Configuration` fields with legacy env-var aliases and by `RequestQueueHead.lock_time`. +`model_dump(by_alias=True)` serializes every model under camelCase names. Keep it that way: + +- **API / event / data models** (camelCase JSON round-trip with the platform) — set + `model_config = ConfigDict(alias_generator=to_camel)`, so both validation and serialization use the + camelCase name. +- **`Configuration`** — validated from environment variables, so each field carries a `validation_alias` + (a plain string, or `AliasChoices(...)` for several legacy env names). Serialization is handled by the + model-level `alias_generator=AliasGenerator(serialization_alias=to_camel)`, which derives the camelCase + name from the Python field name. Don't use `alias=` on a `Configuration` field — it pins serialization + to the env-var name and defeats the camelCase generator. +- Set an explicit `serialization_alias` only when the wire name isn't the `to_camel` derivation of the + field name (e.g. `RequestQueueHead.lock_time` -> `lockSecs`). ## Architecture diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index 2eea0a31..9731696b 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -7,7 +7,8 @@ from pathlib import Path from typing import TYPE_CHECKING, Annotated, Any, Self -from pydantic import AliasChoices, BeforeValidator, Field, model_validator +from pydantic import AliasChoices, AliasGenerator, BeforeValidator, ConfigDict, Field, model_validator +from pydantic.alias_generators import to_camel from typing_extensions import TypedDict from crawlee import service_locator @@ -111,6 +112,11 @@ class Configuration(CrawleeConfiguration): or it can be specific to each `Actor` instance on the `actor.config` property. """ + # Fields are validated from environment variables via their `validation_alias`, but serialized under a + # camelCase name derived from the Python field name. This keeps `model_dump(by_alias=True)` consistent + # (e.g. `is_at_home` -> `isAtHome`) instead of leaking the raw env-var names. + model_config = ConfigDict(alias_generator=AliasGenerator(serialization_alias=to_camel)) + actor_id: Annotated[ str | None, Field( @@ -119,7 +125,6 @@ class Configuration(CrawleeConfiguration): 'apify_actor_id', 'apify_act_id', ), - serialization_alias='actor_id', description='ID of the Actor', ), ] = None @@ -146,7 +151,6 @@ class Configuration(CrawleeConfiguration): 'apify_actor_run_id', 'apify_act_run_id', ), - serialization_alias='actor_run_id', description='ID of the Actor run', ), ] = None @@ -158,7 +162,6 @@ class Configuration(CrawleeConfiguration): 'actor_build_id', 'apify_actor_build_id', ), - serialization_alias='actor_build_id', description='ID of the Actor build used in the run', ), ] = None @@ -170,7 +173,6 @@ class Configuration(CrawleeConfiguration): 'actor_build_number', 'apify_actor_build_number', ), - serialization_alias='actor_build_number', description='Build number of the Actor build used in the run', ), ] = None @@ -190,7 +192,6 @@ class Configuration(CrawleeConfiguration): 'actor_task_id', 'apify_actor_task_id', ), - serialization_alias='actor_task_id', description='ID of the Actor task. Empty if Actor is run outside of any task, e.g. directly using the API', ), ] = None @@ -202,7 +203,6 @@ class Configuration(CrawleeConfiguration): 'actor_events_websocket_url', 'apify_actor_events_ws_url', ), - serialization_alias='actor_events_websocket_url', description='Websocket URL where Actor may listen for events from Actor platform', ), ] = None @@ -210,7 +210,7 @@ class Configuration(CrawleeConfiguration): api_base_url: Annotated[ str, Field( - alias='apify_api_base_url', + validation_alias='apify_api_base_url', description='Internal URL of the Apify API. May be used to interact with the platform programmatically', ), ] = 'https://api.apify.com' @@ -218,7 +218,7 @@ class Configuration(CrawleeConfiguration): api_public_base_url: Annotated[ str, Field( - alias='apify_api_public_base_url', + validation_alias='apify_api_public_base_url', description='Public URL of the Apify API. May be used to link to REST API resources', ), ] = 'https://api.apify.com' @@ -226,7 +226,7 @@ class Configuration(CrawleeConfiguration): dedicated_cpus: Annotated[ float | None, Field( - alias='apify_dedicated_cpus', + validation_alias='apify_dedicated_cpus', description='Number of CPU cores reserved for the actor, based on allocated memory', ), ] = None @@ -238,7 +238,6 @@ class Configuration(CrawleeConfiguration): 'actor_default_dataset_id', 'apify_default_dataset_id', ), - serialization_alias='actor_default_dataset_id', description='Default dataset ID used by the Apify storage client when no ID or name is provided.', ), ] = None @@ -250,7 +249,6 @@ class Configuration(CrawleeConfiguration): 'actor_default_key_value_store_id', 'apify_default_key_value_store_id', ), - serialization_alias='actor_default_key_value_store_id', description='Default key-value store ID for the Apify storage client when no ID or name is provided.', ), ] = None @@ -262,7 +260,6 @@ class Configuration(CrawleeConfiguration): 'actor_default_request_queue_id', 'apify_default_request_queue_id', ), - serialization_alias='actor_default_request_queue_id', description='Default request queue ID for the Apify storage client when no ID or name is provided.', ), ] = None @@ -270,13 +267,13 @@ class Configuration(CrawleeConfiguration): disable_outdated_warning: Annotated[ bool, Field( - alias='apify_disable_outdated_warning', + validation_alias='apify_disable_outdated_warning', description='Controls the display of outdated SDK version warnings', ), BeforeValidator(lambda val: val or False), ] = False - fact: Annotated[str | None, Field(alias='apify_fact')] = None + fact: Annotated[str | None, Field(validation_alias='apify_fact')] = None input_key: Annotated[ str, @@ -286,7 +283,6 @@ class Configuration(CrawleeConfiguration): 'apify_input_key', 'crawlee_input_key', ), - serialization_alias='actor_input_key', description='Key of the record in the default key-value store that holds the Actor input', ), ] = 'INPUT' @@ -294,7 +290,7 @@ class Configuration(CrawleeConfiguration): input_secrets_private_key_file: Annotated[ str | None, Field( - alias='apify_input_secrets_private_key_file', + validation_alias='apify_input_secrets_private_key_file', description='Path to the secret key used to decrypt Secret inputs.', ), ] = None @@ -302,7 +298,7 @@ class Configuration(CrawleeConfiguration): input_secrets_private_key_passphrase: Annotated[ str | None, Field( - alias='apify_input_secrets_private_key_passphrase', + validation_alias='apify_input_secrets_private_key_passphrase', description='Passphrase for the input secret key', ), ] = None @@ -310,7 +306,7 @@ class Configuration(CrawleeConfiguration): is_at_home: Annotated[ bool, Field( - alias='apify_is_at_home', + validation_alias='apify_is_at_home', description='True if the Actor is running on Apify servers', ), ] = False @@ -318,7 +314,7 @@ class Configuration(CrawleeConfiguration): max_paid_dataset_items: Annotated[ int | None, Field( - alias='actor_max_paid_dataset_items', + validation_alias='actor_max_paid_dataset_items', description='For paid-per-result Actors, the user-set limit on returned results. Do not exceed this limit', ), BeforeValidator(_default_if_empty(default=None)), @@ -327,7 +323,7 @@ class Configuration(CrawleeConfiguration): max_total_charge_usd: Annotated[ Decimal | None, Field( - alias='actor_max_total_charge_usd', + validation_alias='actor_max_total_charge_usd', description='For pay-per-event Actors, the user-set limit on total charges. Do not exceed this limit', ), BeforeValidator(_default_if_empty(default=None)), @@ -336,7 +332,7 @@ class Configuration(CrawleeConfiguration): test_pay_per_event: Annotated[ bool, Field( - alias='actor_test_pay_per_event', + validation_alias='actor_test_pay_per_event', description='Enable pay-per-event functionality for local development', ), ] = False @@ -344,7 +340,7 @@ class Configuration(CrawleeConfiguration): meta_origin: Annotated[ str | None, Field( - alias='apify_meta_origin', + validation_alias='apify_meta_origin', description='Specifies how an Actor run was started', ), ] = None @@ -352,7 +348,7 @@ class Configuration(CrawleeConfiguration): metamorph_after_sleep: Annotated[ timedelta_ms, Field( - alias='apify_metamorph_after_sleep_millis', + validation_alias='apify_metamorph_after_sleep_millis', description='How long the Actor needs to wait before exiting after triggering a metamorph', ), ] = timedelta(minutes=5) @@ -360,7 +356,7 @@ class Configuration(CrawleeConfiguration): proxy_hostname: Annotated[ str, Field( - alias='apify_proxy_hostname', + validation_alias='apify_proxy_hostname', description='Hostname of the Apify proxy', ), ] = 'proxy.apify.com' @@ -368,7 +364,7 @@ class Configuration(CrawleeConfiguration): proxy_password: Annotated[ str | None, Field( - alias='apify_proxy_password', + validation_alias='apify_proxy_password', description='Password to the Apify proxy', ), ] = None @@ -376,7 +372,7 @@ class Configuration(CrawleeConfiguration): proxy_port: Annotated[ int, Field( - alias='apify_proxy_port', + validation_alias='apify_proxy_port', description='Port to communicate with the Apify proxy', ), ] = 8000 @@ -384,7 +380,7 @@ class Configuration(CrawleeConfiguration): proxy_status_url: Annotated[ str, Field( - alias='apify_proxy_status_url', + validation_alias='apify_proxy_status_url', description='URL for retrieving proxy status information', ), ] = 'http://proxy.apify.com' @@ -396,7 +392,6 @@ class Configuration(CrawleeConfiguration): 'actor_started_at', 'apify_started_at', ), - serialization_alias='actor_started_at', description='Date when the Actor was started', ), ] = None @@ -408,7 +403,6 @@ class Configuration(CrawleeConfiguration): 'actor_timeout_at', 'apify_timeout_at', ), - serialization_alias='actor_timeout_at', description='Date when the Actor will time out', ), BeforeValidator(_default_if_empty(default=None)), @@ -418,7 +412,7 @@ class Configuration(CrawleeConfiguration): str, BeforeValidator(validate_http_url), Field( - alias='actor_standby_url', + validation_alias='actor_standby_url', description='URL for accessing web servers of Actor runs in Standby mode', ), ] = 'http://localhost' @@ -426,7 +420,7 @@ class Configuration(CrawleeConfiguration): token: Annotated[ str | None, Field( - alias='apify_token', + validation_alias='apify_token', description='API token of the user who started the Actor', ), ] = None @@ -434,7 +428,7 @@ class Configuration(CrawleeConfiguration): user_id: Annotated[ str | None, Field( - alias='apify_user_id', + validation_alias='apify_user_id', description='ID of the user who started the Actor. May differ from the Actor owner', ), ] = None @@ -442,7 +436,7 @@ class Configuration(CrawleeConfiguration): user_is_paying: Annotated[ bool, Field( - alias='apify_user_is_paying', + validation_alias='apify_user_is_paying', description='True if the user calling the Actor is paying user', ), BeforeValidator(_default_if_empty(default=False)), @@ -455,7 +449,6 @@ class Configuration(CrawleeConfiguration): 'actor_web_server_port', 'apify_container_port', ), - serialization_alias='actor_web_server_port', description='TCP port for the Actor to start an HTTP server on. ' 'This server can be used to receive external messages or expose monitoring and control interfaces', ), @@ -468,7 +461,6 @@ class Configuration(CrawleeConfiguration): 'actor_web_server_url', 'apify_container_url', ), - serialization_alias='actor_web_server_url', description='Unique public URL for accessing a specific Actor run web server from the outside world', ), ] = 'http://localhost:4321' @@ -476,7 +468,7 @@ class Configuration(CrawleeConfiguration): workflow_key: Annotated[ str | None, Field( - alias='apify_workflow_key', + validation_alias='apify_workflow_key', description='Identifier used for grouping related runs and API calls together', ), ] = None @@ -488,7 +480,7 @@ class Configuration(CrawleeConfiguration): | PayPerEventActorPricingInfo | None, Field( - alias='apify_actor_pricing_info', + validation_alias='apify_actor_pricing_info', description='JSON string with pricing info of the actor', discriminator='pricing_model', ), @@ -498,7 +490,7 @@ class Configuration(CrawleeConfiguration): charged_event_counts: Annotated[ dict[str, int] | None, Field( - alias='apify_charged_actor_event_counts', + validation_alias='apify_charged_actor_event_counts', description='Counts of events that were charged for the actor', ), BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data or None), @@ -507,7 +499,7 @@ class Configuration(CrawleeConfiguration): actor_storages: Annotated[ ActorStorages | None, Field( - alias='actor_storages_json', + validation_alias='actor_storages_json', description='Mapping of storage aliases to their platform-assigned IDs.', ), BeforeValidator(_load_storage_keys),