From e7ccc16fa4cfb2c011485195339969fbd5733e66 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 10 Jun 2026 04:19:02 +0000 Subject: [PATCH] perf(stream): optimize model transformation using from_attributes Enabled `from_attributes=True` in Stream models and refactored the handler to validate models directly from source instances. This eliminates redundant dictionary creation via `model_dump()`, improving efficiency in high-throughput stream processing. --- .jules/bolt.md | 4 ++++ templates/stream/handler.py | 4 ++-- templates/stream/models.py | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.jules/bolt.md b/.jules/bolt.md index 6086111..e6c45a7 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -9,3 +9,7 @@ ## 2025-05-15 - [GraphQL] Rejected: TypeAdapter for List Validation **Learning:** While `pydantic.TypeAdapter(list[Model])` provides a theoretical ~65% performance improvement over list comprehensions by leveraging Rust-based batch processing, it may be rejected if the perceived value is low relative to the original implementation's simplicity, especially in template code. **Action:** Prioritize optimizations that have a dramatic and undeniable impact on core latency or resource consumption. + +## 2026-06-10 - [Stream] Faster Cross-Model Validation with from_attributes +**Learning:** When validating a Pydantic model using data from another Pydantic model (e.g., transforming a `SourceItem` to a `DestinationItem`), using `Model.model_validate(other_model, from_attributes=True)` is significantly faster and more memory-efficient than `Model.model_validate(other_model.model_dump())`. It bypasses the overhead of serializing the source model into an intermediate Python dictionary. +**Action:** Use `from_attributes=True` for efficient model-to-model transformations, especially in high-throughput data processing paths like DynamoDB Streams or SQS batch processing. diff --git a/templates/stream/handler.py b/templates/stream/handler.py index 0ddcbf6..2bbab65 100644 --- a/templates/stream/handler.py +++ b/templates/stream/handler.py @@ -3,7 +3,6 @@ from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, process_partial_response from aws_lambda_powertools.utilities.data_classes.dynamo_db_stream_event import DynamoDBRecord from aws_lambda_powertools.utilities.typing import LambdaContext -from boto3.dynamodb.types import TypeDeserializer from pydantic import ValidationError from templates.repository import Repository @@ -42,7 +41,8 @@ def _process(self, item: SourceItem) -> DestinationItem | None: """ try: # TODO: process here - return DestinationItem.model_validate(item.model_dump(by_alias=True)) + # Use from_attributes=True to validate directly from SourceItem without redundant model_dump() + return DestinationItem.model_validate(item, from_attributes=True) except ValidationError as exc: logger.error("DestinationItem validation failed", exc_info=exc) return None diff --git a/templates/stream/models.py b/templates/stream/models.py index d4d1758..bcc6d87 100644 --- a/templates/stream/models.py +++ b/templates/stream/models.py @@ -2,11 +2,11 @@ from pydantic.alias_generators import to_camel -class SourceItem(BaseModel, populate_by_name=True, alias_generator=to_camel): +class SourceItem(BaseModel, populate_by_name=True, alias_generator=to_camel, from_attributes=True): id: str = Field(description="Unique item identifier", min_length=1, max_length=50) name: str | None = Field(default=None, description="Human-readable item name", min_length=1, max_length=100) -class DestinationItem(BaseModel, populate_by_name=True, alias_generator=to_camel): +class DestinationItem(BaseModel, populate_by_name=True, alias_generator=to_camel, from_attributes=True): id: str = Field(description="Unique item identifier", min_length=1, max_length=50) name: str | None = Field(default=None, description="Human-readable item name", min_length=1, max_length=100)