From c4e0e82ea40a6a2fa893443d6099ea38436ef89a Mon Sep 17 00:00:00 2001 From: Byron Williams Date: Fri, 9 Jan 2026 13:26:16 -0800 Subject: [PATCH 01/11] feat(gemini-image): refactor architecture with modular design (v0.2.0) Phase 0 - OpenSSF Compliance: - Add MIT LICENSE file - Add SECURITY.md with vulnerability reporting policy - Add CONTRIBUTING.md with contribution guidelines - Add CHANGELOG.md for version history tracking - Update pyproject.toml with new dependencies (tenacity, python-dotenv, structlog) Phase 1 - Architecture Refactoring: - Create exceptions.py with full error hierarchy (GeminiImageError base, ConfigurationError, APIError, RateLimitError, ServerError, ContentBlockedError, ValidationError, FormatDetectionError, FileOperationError, GenerationError) - Create io.py with magic byte format detection fixing JPEG/PNG mismatch, safe file saving with extension correction, and JSON sidecar metadata - Create client.py with GeminiClient class and retry logic using tenacity - Create registry.py for PROMPTS.md generation tracking - Create response_parser.py for API response parsing with ThoughtImage and GenerationResponse dataclasses - Refactor generator.py to use new modular components - Update cli.py with new flags (--no-document, --no-metadata, --registry, --resume) - Update __init__.py with all new exports - Add comprehensive tests for all new modules (86 tests passing) - Add package-specific ruff overrides in pyproject.toml BREAKING CHANGE: Internal module structure changed. Public API remains compatible. Co-Authored-By: Claude Opus 4.5 --- packages/gemini-image/CHANGELOG.md | 52 ++ packages/gemini-image/CONTRIBUTING.md | 170 +++++ packages/gemini-image/LICENSE | 21 + packages/gemini-image/SECURITY.md | 67 ++ packages/gemini-image/pyproject.toml | 32 +- .../gemini-image/src/gemini_image/__init__.py | 61 +- packages/gemini-image/src/gemini_image/cli.py | 80 +- .../gemini-image/src/gemini_image/client.py | 302 ++++++++ .../src/gemini_image/exceptions.py | 303 ++++++++ .../src/gemini_image/generator.py | 682 ++++++++++-------- packages/gemini-image/src/gemini_image/io.py | 336 +++++++++ .../gemini-image/src/gemini_image/registry.py | 282 ++++++++ .../src/gemini_image/response_parser.py | 261 +++++++ packages/gemini-image/tests/conftest.py | 7 + .../gemini-image/tests/test_exceptions.py | 166 +++++ packages/gemini-image/tests/test_generator.py | 74 +- packages/gemini-image/tests/test_io.py | 245 +++++++ packages/gemini-image/tests/test_registry.py | 163 +++++ .../tests/test_response_parser.py | 212 ++++++ uv.lock | 8 +- 20 files changed, 3172 insertions(+), 352 deletions(-) create mode 100644 packages/gemini-image/CHANGELOG.md create mode 100644 packages/gemini-image/CONTRIBUTING.md create mode 100644 packages/gemini-image/LICENSE create mode 100644 packages/gemini-image/SECURITY.md create mode 100644 packages/gemini-image/src/gemini_image/client.py create mode 100644 packages/gemini-image/src/gemini_image/exceptions.py create mode 100644 packages/gemini-image/src/gemini_image/io.py create mode 100644 packages/gemini-image/src/gemini_image/registry.py create mode 100644 packages/gemini-image/src/gemini_image/response_parser.py create mode 100644 packages/gemini-image/tests/test_exceptions.py create mode 100644 packages/gemini-image/tests/test_io.py create mode 100644 packages/gemini-image/tests/test_registry.py create mode 100644 packages/gemini-image/tests/test_response_parser.py diff --git a/packages/gemini-image/CHANGELOG.md b/packages/gemini-image/CHANGELOG.md new file mode 100644 index 0000000..846e5a9 --- /dev/null +++ b/packages/gemini-image/CHANGELOG.md @@ -0,0 +1,52 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +- OpenSSF-required files: LICENSE, SECURITY.md, CONTRIBUTING.md, CHANGELOG.md +- `exceptions.py` module with custom exception hierarchy +- `client.py` module with `GeminiClient` class and retry logic using tenacity +- `io.py` module with magic byte format detection +- `registry.py` module for PROMPTS.md documentation +- `response_parser.py` module for API response handling +- Structured logging with structlog (replaces print statements) +- `--no-document` CLI flag to disable PROMPTS.md logging +- `--resume` flag for story sequence generation (skips existing parts) +- JSON sidecar metadata files for generated images +- Python 3.10 compatibility (replaced `datetime.UTC` with `timezone.utc`) + +### Changed + +- Refactored `generator.py` into smaller, focused modules +- Improved error handling with specific exception types +- Output paths now honor absolute paths (no forced `output/` prefix) +- API key loading now uses python-dotenv for robust .env handling + +### Fixed + +- JPEG/PNG format mismatch from Gemini API (magic byte detection) +- Story mode now resumes from last completed part instead of regenerating + +## [0.1.0] - 2024-12-15 + +### Added + +- Initial release +- `generate_image()` function for text-to-image generation +- `generate_story_sequence()` function for multi-part stories +- `finalize_draft()` function for draft-to-final workflow +- CLI interface (`gemini-image` command) +- Support for Gemini 2.5 Flash and Gemini 3 Pro models +- Aspect ratio and resolution configuration +- Reference image support for editing +- Thinking mode with intermediate image visualization +- Draft mode for cost-effective iteration + +[Unreleased]: https://github.com/ByronWilliamsCPA/python-libs/compare/gemini-image-v0.1.0...HEAD +[0.1.0]: https://github.com/ByronWilliamsCPA/python-libs/releases/tag/gemini-image-v0.1.0 diff --git a/packages/gemini-image/CONTRIBUTING.md b/packages/gemini-image/CONTRIBUTING.md new file mode 100644 index 0000000..600646e --- /dev/null +++ b/packages/gemini-image/CONTRIBUTING.md @@ -0,0 +1,170 @@ +# Contributing to Gemini Image + +Thank you for your interest in contributing to the Gemini Image library! + +## Getting Started + +### Prerequisites + +- Python 3.10+ +- [uv](https://docs.astral.sh/uv/) (recommended) or pip +- A Gemini API key from [Google AI Studio](https://aistudio.google.com/apikey) + +### Development Setup + +```bash +# Clone the repository +git clone https://github.com/ByronWilliamsCPA/python-libs.git +cd python-libs/packages/gemini-image + +# Create virtual environment and install dependencies +uv sync --all-extras + +# Or with pip +python -m venv .venv +source .venv/bin/activate +pip install -e ".[dev]" + +# Set up pre-commit hooks (from repo root) +cd ../.. +uv run pre-commit install +``` + +### Running Tests + +```bash +# Run all tests +uv run pytest + +# Run with coverage +uv run pytest --cov=src/gemini_image --cov-report=term-missing + +# Run specific test file +uv run pytest tests/test_utils.py -v +``` + +### Code Quality + +We use strict linting and type checking: + +```bash +# Format code +uv run ruff format src tests + +# Lint code +uv run ruff check src tests --fix + +# Type check +uv run basedpyright src +``` + +## Making Changes + +### Branch Naming + +Use conventional branch names: + +- `feat/feature-name` - New features +- `fix/bug-description` - Bug fixes +- `docs/topic` - Documentation updates +- `refactor/component` - Code refactoring +- `test/what-testing` - Test additions + +### Commit Messages + +Follow [Conventional Commits](https://www.conventionalcommits.org/): + +``` +feat(generator): add retry logic for API calls +fix(cli): handle missing API key gracefully +docs(readme): update installation instructions +test(utils): add format detection edge cases +``` + +### Code Style + +- **Line length**: 88 characters (Ruff default) +- **Type hints**: Required for all public functions +- **Docstrings**: Google style, required for public APIs +- **Tests**: Required for new functionality + +Example: + +```python +def generate_image( + prompt: str, + model_key: ModelKey = "pro", + *, + verbose: bool = False, +) -> Path | None: + """Generate an image using Gemini. + + Args: + prompt: Text description of the image to generate. + model_key: Model to use ('flash' or 'pro'). + verbose: Show detailed progress output. + + Returns: + Path to the generated image, or None on failure. + + Raises: + ValueError: If model_key is invalid. + APIError: If the Gemini API returns an error. + + """ +``` + +## Pull Request Process + +1. **Create a feature branch** from `main` +2. **Make your changes** with tests +3. **Run quality checks**: + ```bash + uv run ruff format . + uv run ruff check . + uv run basedpyright src + uv run pytest --cov + ``` +4. **Update documentation** if needed +5. **Submit a pull request** with: + - Clear description of changes + - Link to related issue (if any) + - Test plan or evidence + +### PR Requirements + +- [ ] All tests pass +- [ ] Code coverage maintained (80%+) +- [ ] No linting errors +- [ ] Type checking passes +- [ ] Documentation updated (if applicable) +- [ ] CHANGELOG.md updated (for user-facing changes) + +## Reporting Issues + +### Bug Reports + +Include: + +- Python version +- Package version +- Minimal reproduction steps +- Expected vs actual behavior +- Error messages/stack traces + +### Feature Requests + +Include: + +- Use case description +- Proposed API (if applicable) +- Alternatives considered + +## Questions? + +- Open a [GitHub Discussion](https://github.com/ByronWilliamsCPA/python-libs/discussions) +- Check existing issues and discussions first + +## License + +By contributing, you agree that your contributions will be licensed under the MIT License. diff --git a/packages/gemini-image/LICENSE b/packages/gemini-image/LICENSE new file mode 100644 index 0000000..071f9d6 --- /dev/null +++ b/packages/gemini-image/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024-2026 Byron Williams + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/gemini-image/SECURITY.md b/packages/gemini-image/SECURITY.md new file mode 100644 index 0000000..4539c82 --- /dev/null +++ b/packages/gemini-image/SECURITY.md @@ -0,0 +1,67 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +| ------- | ------------------ | +| 0.2.x | :white_check_mark: | +| 0.1.x | :white_check_mark: | +| < 0.1 | :x: | + +## Reporting a Vulnerability + +If you discover a security vulnerability in this project, please report it responsibly: + +1. **Do NOT** create a public GitHub issue for security vulnerabilities +2. Email the maintainer directly at: byronawilliams@gmail.com +3. Include: + - Description of the vulnerability + - Steps to reproduce + - Potential impact + - Suggested fix (if any) + +## Response Timeline + +- **Initial Response**: Within 48 hours +- **Status Update**: Within 7 days +- **Fix Timeline**: Depends on severity + - Critical: 24-48 hours + - High: 7 days + - Medium: 30 days + - Low: Next release + +## Security Best Practices + +When using this library: + +### API Key Security + +- **Never** commit your `GEMINI_API_KEY` to version control +- Use environment variables or `.env` files (with `.gitignore`) +- Rotate API keys periodically +- Use separate keys for development and production + +### Output Security + +- Generated images may contain embedded metadata +- Review generated content before public distribution +- Be aware of prompt injection risks in user-provided prompts + +### Dependencies + +This package depends on: + +- `google-genai`: Official Google Gemini SDK +- `tenacity`: Retry logic (no security-sensitive code) +- `structlog`: Structured logging + +We monitor dependencies for known vulnerabilities using: + +- GitHub Dependabot +- Safety check in CI/CD + +## Acknowledgments + +We appreciate responsible disclosure of security issues. Contributors who report +valid security vulnerabilities will be acknowledged (unless they prefer to remain +anonymous). diff --git a/packages/gemini-image/pyproject.toml b/packages/gemini-image/pyproject.toml index 6493126..93bcbef 100644 --- a/packages/gemini-image/pyproject.toml +++ b/packages/gemini-image/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "byronwilliamscpa-gemini-image" -version = "0.1.0" +version = "0.2.0" description = "Image generation using Google Gemini models (Nano Banana / Nano Banana Pro)" readme = "README.md" requires-python = ">=3.10,<3.15" @@ -24,6 +24,9 @@ classifiers = [ dependencies = [ "google-genai>=1.0.0", + "tenacity>=8.2.0", + "python-dotenv>=1.0.0", + "structlog>=24.1.0", ] [project.optional-dependencies] @@ -64,3 +67,30 @@ changelog_file = "CHANGELOG.md" [tool.semantic_release.branches.main] match = "(main|master)" prerelease = false + +# Package-specific ruff overrides +[tool.ruff.lint] +ignore = [ + # Allow exception messages to use string literals and f-strings + # (common pattern for user-facing errors in libraries) + "EM101", # Exception must not use a string literal + "EM102", # Exception must not use an f-string literal + # Allow catching broad exceptions in specific cases (CLI error handling) + "BLE001", + # Allow dynamic imports for optional dependencies + "PLC0415", + # Allow try-except-return pattern + "TRY300", + # Allow logging.error in exception handlers + "TRY400", + # Complexity is acceptable for CLI main function and generators + "C901", + "PLR0912", + "PLR0915", +] + +[tool.ruff.lint.per-file-ignores] +# Tests can use assert and have simpler patterns +"tests/*" = ["S101", "PLR2004", "ANN"] +# CLI module can use print statements +"src/gemini_image/cli.py" = ["T201"] diff --git a/packages/gemini-image/src/gemini_image/__init__.py b/packages/gemini-image/src/gemini_image/__init__.py index b1e82b6..a122307 100644 --- a/packages/gemini-image/src/gemini_image/__init__.py +++ b/packages/gemini-image/src/gemini_image/__init__.py @@ -8,6 +8,9 @@ - Multi-part story sequence generation with visual continuity - Draft-then-finalize workflow for cost optimization - Thinking mode with intermediate image visualization + - Automatic PROMPTS.md registry for tracking generations + - Magic byte format detection for API mismatch correction + - Retry logic with exponential backoff for resilience Models: - flash: Gemini 2.5 Flash (fast generation) @@ -20,7 +23,30 @@ """ -from gemini_image.generator import generate_image, generate_story_sequence +from gemini_image.client import GeminiClient, get_api_key +from gemini_image.exceptions import ( + APIError, + ConfigurationError, + ContentBlockedError, + FileOperationError, + FormatDetectionError, + GeminiImageError, + GenerationError, + RateLimitError, + ServerError, + ValidationError, +) +from gemini_image.generator import ( + finalize_draft, + generate_image, + generate_story_sequence, +) +from gemini_image.io import ( + detect_image_format, + load_metadata, + save_image, + save_metadata, +) from gemini_image.models import ( ASPECT_RATIOS, DEFAULT_MODEL, @@ -31,18 +57,45 @@ ModelConfig, ModelKey, ) +from gemini_image.registry import PromptRegistry +from gemini_image.response_parser import ( + GenerationResponse, + ThoughtImage, + parse_response, +) __all__ = [ + "APIError", "ASPECT_RATIOS", + "AspectRatio", + "ConfigurationError", + "ContentBlockedError", "DEFAULT_MODEL", + "FileOperationError", + "FormatDetectionError", + "GeminiClient", + "GeminiImageError", + "GenerationError", + "GenerationResponse", "IMAGE_SIZES", - "MODELS", - "AspectRatio", "ImageSize", + "MODELS", "ModelConfig", "ModelKey", + "PromptRegistry", + "RateLimitError", + "ServerError", + "ThoughtImage", + "ValidationError", + "detect_image_format", + "finalize_draft", "generate_image", "generate_story_sequence", + "get_api_key", + "load_metadata", + "parse_response", + "save_image", + "save_metadata", ] -__version__ = "0.1.0" +__version__ = "0.2.0" diff --git a/packages/gemini-image/src/gemini_image/cli.py b/packages/gemini-image/src/gemini_image/cli.py index e6d7868..c1811f0 100644 --- a/packages/gemini-image/src/gemini_image/cli.py +++ b/packages/gemini-image/src/gemini_image/cli.py @@ -6,6 +6,8 @@ import sys from pathlib import Path +import structlog + from gemini_image.generator import ( finalize_draft, generate_image, @@ -13,12 +15,27 @@ ) from gemini_image.models import ASPECT_RATIOS, DEFAULT_MODEL, IMAGE_SIZES, MODELS +logger = structlog.get_logger(__name__) + + +def _configure_logging(verbose: bool) -> None: + """Configure structlog for CLI output.""" + if verbose: + structlog.configure( + wrapper_class=structlog.make_filtering_bound_logger(10), # DEBUG + ) + else: + structlog.configure( + wrapper_class=structlog.make_filtering_bound_logger(20), # INFO + ) + def list_models() -> None: """Print available models.""" print("Available models:\n") for key, config in MODELS.items(): - print(f" {key}:") + default_marker = " (default)" if key == DEFAULT_MODEL else "" + print(f" {key}{default_marker}:") print(f" Name: {config['name']}") print(f" ID: {config['id']}") print(f" Description: {config['description']}") @@ -53,6 +70,12 @@ def main() -> None: # Multi-part story generation (automatic continuity) %(prog)s "A 3-part journey through data governance" --story-parts 3 -o journey %(prog)s "Evolution of a data platform" --story-parts 4 --aspect 16:9 --size 2K -o evolution + + # Resume interrupted story generation + %(prog)s "Continue story" --story-parts 5 -o story --resume + + # Disable PROMPTS.md documentation + %(prog)s "Private prompt" --no-document -o private.png """, ) @@ -131,6 +154,12 @@ def main() -> None: help="Generate a multi-part story with N parts (uses previous image as reference)", ) + parser.add_argument( + "--resume", + action="store_true", + help="Resume story generation, skipping existing parts", + ) + parser.add_argument( "--draft-mode", action="store_true", @@ -144,6 +173,25 @@ def main() -> None: help="Finalize a draft image by regenerating at higher resolution (2K default)", ) + parser.add_argument( + "--no-document", + action="store_true", + help="Disable PROMPTS.md registry documentation (for privacy)", + ) + + parser.add_argument( + "--no-metadata", + action="store_true", + help="Disable JSON metadata sidecar file generation", + ) + + parser.add_argument( + "--registry", + type=Path, + metavar="PATH", + help="Path to PROMPTS.md registry file (default: output_dir/PROMPTS.md)", + ) + parser.add_argument( "--list-models", action="store_true", @@ -152,6 +200,9 @@ def main() -> None: args = parser.parse_args() + # Configure logging based on verbosity + _configure_logging(args.verbose) + if args.list_models: list_models() return @@ -168,7 +219,10 @@ def main() -> None: aspect_ratio=args.aspect, image_size=args.size, verbose=args.verbose, + document=not args.no_document, ) + if result: + print(f"Finalized image saved to: {result}") sys.exit(0 if result else 1) except FileNotFoundError as e: print(f"Error: {e}") @@ -193,8 +247,15 @@ def main() -> None: aspect_ratio=args.aspect, image_size=args.size, verbose=args.verbose, + resume=args.resume, + document=not args.no_document, ) + if results: + print(f"\nGenerated {len(results)} story parts:") + for i, path in enumerate(results, 1): + print(f" Part {i}: {path}") + sys.exit(0 if len(results) == args.story_parts else 1) # Single image mode @@ -211,17 +272,22 @@ def main() -> None: save_thoughts=args.save_thoughts, verbose=args.verbose, is_draft=args.draft_mode, + document=not args.no_document, + registry_path=args.registry, + save_metadata_file=not args.no_metadata, ) - if result and args.draft_mode: - print(f"\n{'=' * 60}") - print("Draft complete! To finalize at higher resolution:") - print(f" gemini-image --finalize {result} --size 2K") - print(f"{'=' * 60}") + if result: + print(f"Image saved to: {result}") + + if args.draft_mode: + print("\nDraft complete! To finalize at higher resolution:") + print(f" gemini-image --finalize {result} --size 2K") sys.exit(0 if result else 1) - except (ValueError, ImportError) as e: + except Exception as e: + logger.error("generation_failed", error=str(e)) print(f"Error: {e}") sys.exit(1) diff --git a/packages/gemini-image/src/gemini_image/client.py b/packages/gemini-image/src/gemini_image/client.py new file mode 100644 index 0000000..31ae617 --- /dev/null +++ b/packages/gemini-image/src/gemini_image/client.py @@ -0,0 +1,302 @@ +"""Gemini API client with retry logic and error handling. + +This module provides a wrapper around the google-genai client with: +- Automatic retry on transient failures (rate limits, server errors) +- Structured logging +- Custom exception mapping +""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Any + +import structlog +from dotenv import load_dotenv +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +from gemini_image.exceptions import ( + APIError, + ConfigurationError, + ContentBlockedError, + RateLimitError, + ServerError, +) + +logger = structlog.get_logger(__name__) + +# Lazy import for google.genai +_genai = None +_types = None + + +def _get_genai() -> tuple[Any, Any]: + """Lazy import google.genai to avoid import errors when not installed.""" + global _genai, _types # noqa: PLW0603 + if _genai is None: + try: + from google import genai + from google.genai import types + + _genai = genai + _types = types + except ImportError as e: + msg = ( + "google-genai package not installed. " + "Install with: pip install google-genai" + ) + raise ImportError(msg) from e + return _genai, _types + + +def get_api_key(env_file: Path | None = None) -> str: + """Get the Gemini API key from environment or .env file. + + This function uses python-dotenv for robust .env file parsing, + supporting quoted values, comments, and multiline values. + + Args: + env_file: Optional path to .env file. If not provided, searches + for .env in current directory and parent directories. + + Returns: + The API key string. + + Raises: + ConfigurationError: If no API key is found. + + """ + # Load .env file if it exists + if env_file: + load_dotenv(env_file) + else: + # Search for .env in current and parent directories + load_dotenv() + + api_key = os.environ.get("GEMINI_API_KEY") + + if not api_key: + raise ConfigurationError( + "GEMINI_API_KEY environment variable not set", + details={ + "hint": "Set it with: export GEMINI_API_KEY='your-api-key'", + "alternative": "Create a .env file with GEMINI_API_KEY=your-key", + }, + ) + + return api_key + + +# Custom exception for retry logic +class _RetryableError(Exception): + """Internal marker for retryable errors.""" + + +def _is_retryable_exception(exc: BaseException) -> bool: + """Check if an exception should trigger a retry.""" + # Check our custom exceptions + if isinstance(exc, (RateLimitError, ServerError, _RetryableError)): + return True + + # Check google-genai exceptions + exc_type = type(exc).__name__ + exc_str = str(exc).lower() + + # Rate limit indicators + if "429" in exc_str or ("rate" in exc_str and "limit" in exc_str): + return True + + # Server error indicators + if any(code in exc_str for code in ["500", "502", "503", "504"]): + return True + + # Connection errors + if exc_type in ("ConnectionError", "TimeoutError", "ConnectionResetError"): + return True + + return False + + +class GeminiClient: + """Wrapper around google-genai with retry logic and error handling. + + This client provides: + - Automatic retry on transient failures (rate limits, server errors) + - Exponential backoff between retries + - Structured logging of requests and responses + - Custom exception mapping for better error handling + + Example: + >>> client = GeminiClient() + >>> response = client.generate_content( + ... model="gemini-3-pro-image-preview", + ... contents=["A beautiful sunset"], + ... config=generate_config, + ... ) + + """ + + def __init__( + self, + api_key: str | None = None, + *, + max_retries: int = 3, + min_retry_wait: float = 4.0, + max_retry_wait: float = 60.0, + ) -> None: + """Initialize the Gemini client. + + Args: + api_key: API key for authentication. If not provided, reads from + GEMINI_API_KEY environment variable. + max_retries: Maximum number of retry attempts for transient failures. + min_retry_wait: Minimum wait time between retries in seconds. + max_retry_wait: Maximum wait time between retries in seconds. + + Raises: + ConfigurationError: If no API key is available. + ImportError: If google-genai is not installed. + + """ + self._api_key = api_key or get_api_key() + self._max_retries = max_retries + self._min_retry_wait = min_retry_wait + self._max_retry_wait = max_retry_wait + + genai, _ = _get_genai() + self._client = genai.Client(api_key=self._api_key) + + logger.debug( + "gemini_client_initialized", + max_retries=max_retries, + ) + + @property + def client(self) -> Any: + """Access the underlying google-genai client.""" + return self._client + + def generate_content( + self, + *, + model: str, + contents: list[Any], + config: Any, + ) -> Any: + """Generate content with automatic retry on transient failures. + + This method wraps the underlying API call with retry logic using + exponential backoff. It automatically retries on: + - HTTP 429 (rate limit exceeded) + - HTTP 5xx (server errors) + - Connection errors and timeouts + + Args: + model: Model ID to use for generation. + contents: List of content parts (text, images). + config: Generation configuration. + + Returns: + The API response. + + Raises: + RateLimitError: If rate limit exceeded after all retries. + ServerError: If server error persists after all retries. + ContentBlockedError: If content is blocked by safety filters. + APIError: For other API errors. + + """ + # Create the retry decorator dynamically to use instance settings + @retry( + stop=stop_after_attempt(self._max_retries), + wait=wait_exponential( + multiplier=1, + min=self._min_retry_wait, + max=self._max_retry_wait, + ), + retry=retry_if_exception_type(_RetryableError), + reraise=True, + ) + def _generate_with_retry() -> Any: + try: + logger.debug( + "generating_content", + model=model, + content_parts=len(contents), + ) + + response = self._client.models.generate_content( + model=model, + contents=contents, + config=config, + ) + + logger.debug( + "generation_complete", + has_candidates=bool(response.candidates), + ) + + return response + + except Exception as e: + # Map to our exception types + mapped_exc = self._map_exception(e) + + # Re-raise retryable exceptions wrapped for tenacity + if isinstance(mapped_exc, (RateLimitError, ServerError)): + logger.warning( + "retryable_error", + error_type=type(mapped_exc).__name__, + message=str(mapped_exc), + ) + raise _RetryableError(str(mapped_exc)) from mapped_exc + + # Non-retryable exceptions + raise mapped_exc from e + + try: + return _generate_with_retry() + except _RetryableError as e: + # If we exhausted retries, raise the underlying exception + if e.__cause__: + raise e.__cause__ from None + raise APIError(f"Generation failed after retries: {e}") from e + + def _map_exception(self, exc: Exception) -> Exception: + """Map google-genai exceptions to our exception types.""" + exc_str = str(exc).lower() + exc_type = type(exc).__name__ + + # Check for rate limit + if "429" in exc_str or ("rate" in exc_str and "limit" in exc_str): + return RateLimitError( + f"API rate limit exceeded: {exc}", + status_code=429, + ) + + # Check for server errors + for code in ["500", "502", "503", "504"]: + if code in exc_str: + return ServerError( + f"API server error: {exc}", + status_code=int(code), + ) + + # Check for content blocked + if "blocked" in exc_str or "safety" in exc_str: + return ContentBlockedError( + f"Content blocked by safety filters: {exc}", + ) + + # Check for connection errors + if exc_type in ("ConnectionError", "TimeoutError", "ConnectionResetError"): + return ServerError(f"Connection error: {exc}") + + # Generic API error + return APIError(f"API error: {exc}") diff --git a/packages/gemini-image/src/gemini_image/exceptions.py b/packages/gemini-image/src/gemini_image/exceptions.py new file mode 100644 index 0000000..6b9d944 --- /dev/null +++ b/packages/gemini-image/src/gemini_image/exceptions.py @@ -0,0 +1,303 @@ +"""Custom exceptions for Gemini image generation. + +This module provides a hierarchy of exceptions for handling various error +conditions in the image generation process. +""" + +from __future__ import annotations + +from typing import Any + + +class GeminiImageError(Exception): + """Base exception for all gemini-image errors. + + All custom exceptions in this package inherit from this class, + allowing users to catch all package-specific errors with a single + except clause. + + Attributes: + message: Human-readable error description. + details: Optional dictionary with additional context. + + """ + + def __init__(self, message: str, *, details: dict[str, Any] | None = None) -> None: + """Initialize the exception. + + Args: + message: Human-readable error description. + details: Optional dictionary with additional error context. + + """ + super().__init__(message) + self.message = message + self.details = details or {} + + def __str__(self) -> str: + """Return string representation with details if present.""" + if self.details: + detail_str = ", ".join(f"{k}={v!r}" for k, v in self.details.items()) + return f"{self.message} ({detail_str})" + return self.message + + +class ConfigurationError(GeminiImageError): + """Raised when there is a configuration problem. + + Examples: + - Missing API key + - Invalid model configuration + - Missing required environment variables + + """ + + +class APIError(GeminiImageError): + """Base class for API-related errors. + + Attributes: + status_code: HTTP status code if available. + response: Raw API response if available. + + """ + + def __init__( + self, + message: str, + *, + status_code: int | None = None, + response: Any = None, + details: dict[str, Any] | None = None, + ) -> None: + """Initialize the API error. + + Args: + message: Human-readable error description. + status_code: HTTP status code if available. + response: Raw API response if available. + details: Optional dictionary with additional error context. + + """ + details = details or {} + if status_code is not None: + details["status_code"] = status_code + super().__init__(message, details=details) + self.status_code = status_code + self.response = response + + +class RateLimitError(APIError): + """Raised when API rate limit is exceeded. + + This error indicates the request should be retried after a delay. + The retry logic in GeminiClient handles this automatically. + + """ + + def __init__( + self, + message: str = "API rate limit exceeded", + *, + retry_after: float | None = None, + **kwargs: Any, + ) -> None: + """Initialize the rate limit error. + + Args: + message: Human-readable error description. + retry_after: Suggested wait time in seconds. + **kwargs: Additional arguments passed to APIError. + + """ + details = kwargs.pop("details", None) or {} + if retry_after is not None: + details["retry_after"] = retry_after + super().__init__(message, details=details, **kwargs) + self.retry_after = retry_after + + +class ServerError(APIError): + """Raised when the API returns a server error (5xx). + + This error indicates a transient failure that may succeed on retry. + + """ + + +class ContentBlockedError(APIError): + """Raised when content is blocked by safety filters. + + This error indicates the prompt or generated content violated + content safety policies. Retrying will not help. + + Attributes: + safety_ratings: Safety ratings from the API if available. + + """ + + def __init__( + self, + message: str = "Content blocked by safety filters", + *, + safety_ratings: list[dict[str, Any]] | None = None, + **kwargs: Any, + ) -> None: + """Initialize the content blocked error. + + Args: + message: Human-readable error description. + safety_ratings: Safety ratings from the API. + **kwargs: Additional arguments passed to APIError. + + """ + details = kwargs.pop("details", None) or {} + if safety_ratings: + details["safety_ratings"] = safety_ratings + super().__init__(message, details=details, **kwargs) + self.safety_ratings = safety_ratings + + +class ValidationError(GeminiImageError): + """Raised when input validation fails. + + Examples: + - Invalid model key + - Invalid aspect ratio + - Invalid image size + + """ + + def __init__( + self, + message: str, + *, + field: str | None = None, + value: Any = None, + valid_options: list[Any] | None = None, + **kwargs: Any, + ) -> None: + """Initialize the validation error. + + Args: + message: Human-readable error description. + field: Name of the field that failed validation. + value: The invalid value that was provided. + valid_options: List of valid options if applicable. + **kwargs: Additional arguments passed to GeminiImageError. + + """ + details = kwargs.pop("details", None) or {} + if field is not None: + details["field"] = field + if value is not None: + details["value"] = value + if valid_options is not None: + details["valid_options"] = valid_options + super().__init__(message, details=details, **kwargs) + self.field = field + self.value = value + self.valid_options = valid_options + + +class FormatDetectionError(GeminiImageError): + """Raised when image format cannot be detected. + + This error indicates the image data does not match any known + format based on magic bytes. + + """ + + def __init__( + self, + message: str = "Unable to detect image format", + *, + magic_bytes: bytes | None = None, + **kwargs: Any, + ) -> None: + """Initialize the format detection error. + + Args: + message: Human-readable error description. + magic_bytes: First few bytes of the unrecognized data. + **kwargs: Additional arguments passed to GeminiImageError. + + """ + details = kwargs.pop("details", None) or {} + if magic_bytes is not None: + details["magic_bytes"] = magic_bytes.hex()[:32] + super().__init__(message, details=details, **kwargs) + self.magic_bytes = magic_bytes + + +class FileOperationError(GeminiImageError): + """Raised when a file operation fails. + + Examples: + - File not found + - Permission denied + - Disk full + + """ + + def __init__( + self, + message: str, + *, + path: str | None = None, + operation: str | None = None, + **kwargs: Any, + ) -> None: + """Initialize the file operation error. + + Args: + message: Human-readable error description. + path: Path to the file involved. + operation: Type of operation that failed (read, write, etc.). + **kwargs: Additional arguments passed to GeminiImageError. + + """ + details = kwargs.pop("details", None) or {} + if path is not None: + details["path"] = path + if operation is not None: + details["operation"] = operation + super().__init__(message, details=details, **kwargs) + self.path = path + self.operation = operation + + +class GenerationError(GeminiImageError): + """Raised when image generation fails. + + This is a general error for failures during the generation process + that don't fit into more specific categories. + + """ + + def __init__( + self, + message: str = "Image generation failed", + *, + prompt: str | None = None, + model: str | None = None, + **kwargs: Any, + ) -> None: + """Initialize the generation error. + + Args: + message: Human-readable error description. + prompt: The prompt that was used (truncated for logging). + model: The model that was used. + **kwargs: Additional arguments passed to GeminiImageError. + + """ + details = kwargs.pop("details", None) or {} + if prompt is not None: + # Truncate prompt for logging + details["prompt"] = prompt[:100] + "..." if len(prompt) > 100 else prompt + if model is not None: + details["model"] = model + super().__init__(message, details=details, **kwargs) + self.prompt = prompt + self.model = model diff --git a/packages/gemini-image/src/gemini_image/generator.py b/packages/gemini-image/src/gemini_image/generator.py index 7eb83c3..d991e71 100644 --- a/packages/gemini-image/src/gemini_image/generator.py +++ b/packages/gemini-image/src/gemini_image/generator.py @@ -1,55 +1,40 @@ """Core image generation functions using Google Gemini. -Note: This module has complexity warnings (C901, PLR0912, PLR0915) due to the -comprehensive response handling logic inherited from the source script. -The google-genai types are dynamically loaded, causing reportUnknown* warnings. +This module provides the main entry points for image generation: +- generate_image(): Single image generation +- generate_story_sequence(): Multi-part story generation +- finalize_draft(): Draft-to-final upscaling """ -# ruff: noqa: C901, PLR0912, PLR0915, PLC0415 from __future__ import annotations -import base64 -from datetime import UTC, datetime +from datetime import datetime, timezone from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING +import structlog + +from gemini_image.client import GeminiClient, _get_genai +from gemini_image.exceptions import ValidationError +from gemini_image.io import ( + get_extension_for_format, + load_metadata, + save_image, + save_metadata, +) from gemini_image.models import ( ASPECT_RATIOS, DEFAULT_MODEL, IMAGE_SIZES, MODELS, - AspectRatio, - ImageSize, - ModelKey, -) -from gemini_image.utils import ( - get_api_key, - get_file_extension, - load_image_as_base64, ) +from gemini_image.registry import PromptRegistry +from gemini_image.response_parser import GenerationResponse, parse_response -# Lazy import for google.genai -_genai = None -_types = None - - -def _get_genai() -> tuple[Any, Any]: - """Lazy import google.genai to avoid import errors when not installed.""" - global _genai, _types # noqa: PLW0603 - if _genai is None: - try: - from google import genai - from google.genai import types - - _genai = genai - _types = types - except ImportError as e: - msg = ( - "google-genai package not installed. " - "Install with: pip install google-genai" - ) - raise ImportError(msg) from e - return _genai, _types +if TYPE_CHECKING: + from gemini_image.models import AspectRatio, ImageSize, ModelKey + +logger = structlog.get_logger(__name__) def generate_image( @@ -64,6 +49,10 @@ def generate_image( save_thoughts: bool = False, verbose: bool = False, is_draft: bool = False, + *, + document: bool = True, + registry_path: Path | None = None, + save_metadata_file: bool = True, ) -> Path | None: """Generate an image using Gemini. @@ -80,229 +69,115 @@ def generate_image( save_thoughts: Save intermediate thought images (pro model only). verbose: Show detailed thinking process and thought signatures. is_draft: Generate at 1K resolution for fast iteration. + document: If True, adds entry to PROMPTS.md registry. + registry_path: Path to PROMPTS.md file (default: output_dir/PROMPTS.md). + save_metadata_file: If True, saves JSON metadata sidecar file. Returns: Path to the generated image, or None on failure. Raises: - ValueError: If model_key is invalid or API key is missing. - ImportError: If google-genai is not installed. + ValidationError: If model_key, aspect_ratio, or image_size is invalid. + ConfigurationError: If API key is missing. + GenerationError: If image generation fails. """ - genai, types = _get_genai() - api_key = get_api_key() - - if model_key not in MODELS: - msg = f"Unknown model '{model_key}'. Valid options: {list(MODELS.keys())}" - raise ValueError(msg) + # Validate inputs + _validate_model_key(model_key) + _validate_aspect_ratio(aspect_ratio) + _validate_image_size(image_size) model_config = MODELS[model_key] - model_id = model_config["id"] - - if verbose: - print(f"Using model: {model_config['name']}") # noqa: T201 - print(f"Prompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}") # noqa: T201 - - # Initialize client - client = genai.Client(api_key=api_key) - # Build the content parts - contents: list = [] - - # Add reference images if provided - if reference_images: - for img_path in reference_images: - if not img_path.exists(): - if verbose: - print(f"Warning: Reference image not found: {img_path}") # noqa: T201 - continue - - if verbose: - print(f"Including reference image: {img_path}") # noqa: T201 - img_data, mime_type = load_image_as_base64(img_path) - contents.append( - types.Part.from_bytes( - data=base64.standard_b64decode(img_data), - mime_type=mime_type, - ) - ) + logger.info( + "generating_image", + model=model_config["name"], + prompt_preview=prompt[:50] + "..." if len(prompt) > 50 else prompt, + is_draft=is_draft, + ) - # Add the text prompt - contents.append(prompt) + # Initialize client and types + client = GeminiClient() + _, types = _get_genai() - # Build config kwargs - config_kwargs = { - "response_modalities": ["IMAGE", "TEXT"], - } + # Build content parts + contents = _build_contents(reference_images, prompt, types, verbose=verbose) - # Override size to 1K if draft mode + # Build generation config effective_size = "1K" if is_draft else image_size + config = _build_generation_config( + model_config=model_config, + aspect_ratio=aspect_ratio, + image_size=effective_size, + use_search=use_search, + types=types, + verbose=verbose, + ) - # Add image config for pro model - if model_config.get("supports_image_config"): - image_config_kwargs = {} - if aspect_ratio: - if aspect_ratio not in ASPECT_RATIOS: - if verbose: - print( # noqa: T201 - f"Warning: Invalid aspect ratio '{aspect_ratio}'. " - f"Valid: {ASPECT_RATIOS}" - ) - else: - image_config_kwargs["aspect_ratio"] = aspect_ratio - if verbose: - print(f"Aspect ratio: {aspect_ratio}") # noqa: T201 - if effective_size: - if effective_size not in IMAGE_SIZES: - if verbose: - print( # noqa: T201 - f"Warning: Invalid image size '{effective_size}'. " - f"Valid: {IMAGE_SIZES}" - ) - else: - image_config_kwargs["image_size"] = effective_size - if verbose: - print(f"Image size: {effective_size}") # noqa: T201 - - if image_config_kwargs: - config_kwargs["image_config"] = types.ImageConfig(**image_config_kwargs) - - # Add Google Search grounding if requested - if use_search: - # Google API accepts dict format for tools - config_kwargs["tools"] = [{"google_search": {}}] # type: ignore[typeddict-item] - if verbose: - print("Google Search grounding: enabled") # noqa: T201 - - # Configure generation - generate_config = types.GenerateContentConfig(**config_kwargs) - - if verbose: - print("Generating image...") # noqa: T201 - - response = client.models.generate_content( - model=model_id, + # Generate image + response = client.generate_content( + model=model_config["id"], contents=contents, - config=generate_config, + config=config, ) - # Process response - if not response.candidates: - if verbose: - print("Error: No response candidates returned.") # noqa: T201 - if hasattr(response, "prompt_feedback"): - print(f"Feedback: {response.prompt_feedback}") # noqa: T201 - return None + # Parse response + parsed = parse_response(response, verbose=verbose) - # Track thoughts and final images - thought_count = 0 - final_image_data = None - final_mime_type = None - final_signature = None + if not parsed.has_image: + logger.error("generation_failed_no_image") + return None - # Determine output directory + # Determine output path if output_dir is None: output_dir = Path.cwd() - # Process all parts in response - for part in response.candidates[0].content.parts: - # Check if this is a thought (intermediate reasoning step) - is_thought = hasattr(part, "thought") and part.thought - - if is_thought: - thought_count += 1 - if verbose: - print(f"\n[Thought {thought_count}]") # noqa: T201 - - # Handle thought text - if part.text is not None and verbose: - print(f"Reasoning: {part.text}") # noqa: T201 - - # Handle thought image - if part.inline_data is not None and save_thoughts: - thought_data = part.inline_data.data - thought_mime = part.inline_data.mime_type - thought_ext = get_file_extension(thought_mime) - - # Save thought image - if output_path: - thought_path = ( - output_dir - / f"{output_path.stem}_thought{thought_count}{thought_ext}" - ) - else: - timestamp = datetime.now(tz=UTC).strftime("%Y%m%d_%H%M%S") - thought_path = ( - output_dir / f"thought{thought_count}_{timestamp}{thought_ext}" - ) - - thought_path.parent.mkdir(parents=True, exist_ok=True) - with open(thought_path, "wb") as f: - f.write(thought_data) - - if verbose: - print(f"Thought image {thought_count} saved to: {thought_path}") # noqa: T201 - - # Non-thought content (final output) - elif part.inline_data is not None: - # Final image - final_image_data = part.inline_data.data - final_mime_type = part.inline_data.mime_type - - # Extract thought signature if available - if hasattr(part, "thought_signature") and part.thought_signature: - final_signature = part.thought_signature - if verbose: - print(f"\n[Thought Signature]: {final_signature[:100]}...") # noqa: T201 - - elif part.text is not None and verbose: - # Final text response - print(f"\nModel response: {part.text}") # noqa: T201 - - # Extract thought signature from text part if available - if hasattr(part, "thought_signature") and part.thought_signature: - final_signature = part.thought_signature - if verbose: - print(f"[Thought Signature]: {final_signature[:100]}...") # noqa: T201 - - # Save final image - if final_image_data is not None: - # Determine output filename - if output_path is None: - timestamp = datetime.now(tz=UTC).strftime("%Y%m%d_%H%M%S") - ext = get_file_extension(final_mime_type or "image/png") - prefix = "draft_" if is_draft else "generated_" - output_path = output_dir / f"{prefix}{timestamp}{ext}" - elif not output_path.is_absolute(): - output_path = output_dir / output_path - - # Ensure output directory exists - output_path.parent.mkdir(parents=True, exist_ok=True) - - # Write image - with open(output_path, "wb") as f: - f.write(final_image_data) + final_path = _determine_output_path( + output_path=output_path, + output_dir=output_dir, + image_format=parsed.image_format or "png", + is_draft=is_draft, + ) - if verbose: - if thought_count > 0: - print(f"\nProcessed {thought_count} thought step(s)") # noqa: T201 - print(f"Final image saved to: {output_path}") # noqa: T201 + # Save the image (with format correction) + assert parsed.image_data is not None # We checked has_image above + saved_path = save_image(parsed.image_data, final_path, correct_extension=True) - # Optionally save thought signature to sidecar file - if final_signature and verbose: - sig_path = output_path.with_suffix(".signature.bin") - with open(sig_path, "wb") as f: - if isinstance(final_signature, bytes): - f.write(final_signature) - else: - f.write(str(final_signature).encode()) - print(f"Thought signature saved to: {sig_path}") # noqa: T201 + # Save thought images if requested + if save_thoughts and parsed.thought_images: + _save_thought_images(parsed, saved_path, output_dir, verbose=verbose) - return output_path + # Save metadata sidecar + if save_metadata_file: + save_metadata( + saved_path, + prompt=prompt, + model=model_key, + aspect_ratio=aspect_ratio, + image_size=effective_size, + reference_images=reference_images, + thought_signature=( + str(parsed.thought_signature) if parsed.thought_signature else None + ), + is_draft=is_draft, + ) - if verbose: - print("Error: No image data in response.") # noqa: T201 - return None + # Add to registry + if document: + reg_path = registry_path or (output_dir / "PROMPTS.md") + registry = PromptRegistry(reg_path) + registry.add_entry( + image_path=saved_path, + prompt=prompt, + model=model_key, + aspect_ratio=aspect_ratio, + image_size=effective_size, + reference_images=reference_images, + is_draft=is_draft, + ) + + logger.info("image_generated", path=str(saved_path)) + return saved_path def generate_story_sequence( @@ -314,8 +189,11 @@ def generate_story_sequence( aspect_ratio: AspectRatio | None = None, image_size: ImageSize | None = None, verbose: bool = False, + *, + resume: bool = True, + document: bool = True, ) -> list[Path]: - """Generate a multi-part story sequence using conversational refinement. + """Generate a multi-part story sequence with visual continuity. Each subsequent image uses the previous image as a reference for visual continuity. @@ -330,71 +208,61 @@ def generate_story_sequence( aspect_ratio: Aspect ratio for all images. image_size: Image size for all images. verbose: Show detailed process. + resume: If True, skips parts that already exist. + document: If True, adds entries to PROMPTS.md registry. Returns: List of paths to generated images. Raises: - ValueError: If num_parts < 1. + ValidationError: If num_parts < 1. """ if num_parts < 1: - msg = "Number of story parts must be at least 1" - raise ValueError(msg) + raise ValidationError( + "Number of story parts must be at least 1", + field="num_parts", + value=num_parts, + ) if output_dir is None: output_dir = Path.cwd() if output_prefix is None: - timestamp = datetime.now(tz=UTC).strftime("%Y%m%d_%H%M%S") + timestamp = datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S") output_prefix = Path(f"story_{timestamp}") generated_images: list[Path] = [] previous_image_path: Path | None = None - if verbose: - print(f"Generating {num_parts}-part story sequence...") # noqa: T201 - print(f"Base prompt: {base_prompt}\n") # noqa: T201 + logger.info( + "generating_story_sequence", + num_parts=num_parts, + base_prompt=base_prompt[:50] + "..." if len(base_prompt) > 50 else base_prompt, + ) for part_num in range(1, num_parts + 1): - if verbose: - print(f"\n{'=' * 60}") # noqa: T201 - print(f"PART {part_num}/{num_parts}") # noqa: T201 - print(f"{'=' * 60}") # noqa: T201 + # Build output path for this part + output_path = output_dir / f"{output_prefix.stem}_part{part_num}.png" + + # Check if we should resume from existing + if resume and output_path.exists(): + logger.info("skipping_existing_part", part=part_num, path=str(output_path)) + generated_images.append(output_path) + previous_image_path = output_path + continue # Build prompt for this part - if part_num == 1: - prompt = ( - f"{base_prompt}\n\n" - f"This is part 1 of {num_parts}. Create the opening scene that " - "establishes the context and visual style for the entire sequence." - ) - elif part_num == num_parts: - prompt = ( - f"This is part {part_num} of {num_parts}, the final scene. " - "Building on the previous image, create a concluding scene that " - "resolves the narrative. Maintain visual consistency with the " - "established style." - ) - else: - prompt = ( - f"This is part {part_num} of {num_parts}. Building on the previous " - "image, advance the narrative while maintaining visual consistency " - "with the established style." - ) + part_prompt = _build_story_prompt(base_prompt, part_num, num_parts) - # Build output path - output_path = Path(f"{output_prefix.stem}_part{part_num}.png") + logger.info("generating_story_part", part=part_num, total=num_parts) # Build reference images list reference_images = [previous_image_path] if previous_image_path else None - if verbose: - print(f"Prompt: {prompt[:100]}...") # noqa: T201 - # Generate this part result = generate_image( - prompt=prompt, + prompt=part_prompt, model_key=model_key, reference_images=reference_images, output_path=output_path, @@ -404,27 +272,22 @@ def generate_story_sequence( use_search=False, save_thoughts=False, verbose=verbose, + document=document, ) if result: generated_images.append(result) previous_image_path = result - if verbose: - print(f"Part {part_num} complete: {result}") # noqa: T201 + logger.info("story_part_complete", part=part_num, path=str(result)) else: - if verbose: - print(f"Failed to generate part {part_num}") # noqa: T201 + logger.error("story_part_failed", part=part_num) break - if verbose: - print(f"\n{'=' * 60}") # noqa: T201 - print( # noqa: T201 - f"Story sequence complete: {len(generated_images)}/{num_parts} parts generated" - ) - print(f"{'=' * 60}\n") # noqa: T201 - - for i, path in enumerate(generated_images, 1): - print(f" Part {i}: {path}") # noqa: T201 + logger.info( + "story_sequence_complete", + generated=len(generated_images), + total=num_parts, + ) return generated_images @@ -438,41 +301,65 @@ def finalize_draft( aspect_ratio: AspectRatio | None = None, image_size: ImageSize | None = None, verbose: bool = False, + *, + document: bool = True, ) -> Path | None: """Finalize a draft image by regenerating at higher resolution. + This function attempts to read the original prompt from the draft's + metadata sidecar file. If available, it uses that prompt for better + reproduction fidelity. + Args: draft_path: Path to the draft image. - prompt: Optional refinement prompt. If not provided, uses a - default upscaling prompt. + prompt: Optional refinement prompt. If not provided, uses the + original prompt from metadata or a default upscaling prompt. model_key: Model to use. output_path: Output path for the final image. output_dir: Output directory. aspect_ratio: Aspect ratio (default: "16:9"). image_size: Target resolution (default: "2K"). verbose: Show detailed process. + document: If True, adds entry to PROMPTS.md registry. Returns: Path to the finalized image, or None on failure. Raises: - FileNotFoundError: If the draft image doesn't exist. + FileOperationError: If the draft image doesn't exist. """ + from gemini_image.exceptions import FileOperationError + if not draft_path.exists(): - msg = f"Draft image not found: {draft_path}" - raise FileNotFoundError(msg) + raise FileOperationError( + f"Draft image not found: {draft_path}", + path=str(draft_path), + operation="read", + ) - # Determine final resolution - final_size = image_size if image_size else "2K" - final_aspect = aspect_ratio if aspect_ratio else "16:9" + # Try to load original metadata + metadata = load_metadata(draft_path) + original_prompt = None - if verbose: - print(f"Finalizing draft image: {draft_path}") # noqa: T201 - print(f"Target resolution: {final_size} ({final_aspect})") # noqa: T201 + if metadata: + original_prompt = metadata.get("prompt") + if verbose and original_prompt: + logger.info("using_original_prompt", preview=original_prompt[:50]) - # Use provided prompt or default upscaling prompt - final_prompt = prompt or ( + # Determine final resolution + final_size = image_size or "2K" + final_aspect = aspect_ratio or "16:9" + + logger.info( + "finalizing_draft", + draft=str(draft_path), + target_size=final_size, + target_aspect=final_aspect, + ) + + # Use provided prompt, original prompt, or default + final_prompt = prompt or original_prompt or ( "Recreate this image at higher resolution with the same " "composition, style, and details" ) @@ -490,13 +377,204 @@ def finalize_draft( aspect_ratio=final_aspect, image_size=final_size, verbose=verbose, + is_draft=False, + document=document, ) - if result and verbose: - print(f"\n{'=' * 60}") # noqa: T201 - print("Finalization complete!") # noqa: T201 - print(f"Draft: {draft_path}") # noqa: T201 - print(f"Final ({final_size}): {result}") # noqa: T201 - print(f"{'=' * 60}") # noqa: T201 + if result: + logger.info( + "finalization_complete", + draft=str(draft_path), + final=str(result), + size=final_size, + ) return result + + +# --- Helper Functions --- + + +def _validate_model_key(model_key: str) -> None: + """Validate that the model key is valid.""" + if model_key not in MODELS: + raise ValidationError( + f"Unknown model '{model_key}'", + field="model_key", + value=model_key, + valid_options=list(MODELS.keys()), + ) + + +def _validate_aspect_ratio(aspect_ratio: str | None) -> None: + """Validate that the aspect ratio is valid.""" + if aspect_ratio is not None and aspect_ratio not in ASPECT_RATIOS: + raise ValidationError( + f"Invalid aspect ratio '{aspect_ratio}'", + field="aspect_ratio", + value=aspect_ratio, + valid_options=ASPECT_RATIOS, + ) + + +def _validate_image_size(image_size: str | None) -> None: + """Validate that the image size is valid.""" + if image_size is not None and image_size not in IMAGE_SIZES: + raise ValidationError( + f"Invalid image size '{image_size}'", + field="image_size", + value=image_size, + valid_options=IMAGE_SIZES, + ) + + +def _build_contents( + reference_images: list[Path] | None, + prompt: str, + types: object, + *, + verbose: bool = False, +) -> list[object]: + """Build the contents list for the API request.""" + contents: list[object] = [] + + if reference_images: + for img_path in reference_images: + if not img_path.exists(): + logger.warning("reference_image_not_found", path=str(img_path)) + continue + + logger.debug("including_reference_image", path=str(img_path)) + + # Load image data + with open(img_path, "rb") as f: + data = f.read() + + # Detect MIME type from extension (will be validated by API) + suffix = img_path.suffix.lower() + mime_types = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + } + mime_type = mime_types.get(suffix, "image/png") + + # Add as Part + contents.append( + types.Part.from_bytes( # type: ignore[union-attr] + data=data, + mime_type=mime_type, + ) + ) + + # Add the text prompt + contents.append(prompt) + return contents + + +def _build_generation_config( + model_config: dict[str, object], + aspect_ratio: str | None, + image_size: str | None, + use_search: bool, + types: object, + *, + verbose: bool = False, +) -> object: + """Build the generation config for the API request.""" + config_kwargs: dict[str, object] = { + "response_modalities": ["IMAGE", "TEXT"], + } + + # Add image config for pro model + if model_config.get("supports_image_config"): + image_config_kwargs: dict[str, str] = {} + + if aspect_ratio: + image_config_kwargs["aspect_ratio"] = aspect_ratio + logger.debug("config_aspect_ratio", value=aspect_ratio) + + if image_size: + image_config_kwargs["image_size"] = image_size + logger.debug("config_image_size", value=image_size) + + if image_config_kwargs: + config_kwargs["image_config"] = types.ImageConfig(**image_config_kwargs) # type: ignore[union-attr] + + # Add Google Search grounding if requested + if use_search: + config_kwargs["tools"] = [{"google_search": {}}] # type: ignore[typeddict-item] + logger.debug("config_search_enabled") + + return types.GenerateContentConfig(**config_kwargs) # type: ignore[union-attr] + + +def _determine_output_path( + output_path: Path | None, + output_dir: Path, + image_format: str, + is_draft: bool, +) -> Path: + """Determine the final output path.""" + if output_path is not None: + # Honor absolute paths as-is + if output_path.is_absolute(): + return output_path + return output_dir / output_path + + # Generate timestamped filename + timestamp = datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S") + ext = get_extension_for_format(image_format) + prefix = "draft_" if is_draft else "generated_" + return output_dir / f"{prefix}{timestamp}{ext}" + + +def _save_thought_images( + parsed: GenerationResponse, + main_image_path: Path, + output_dir: Path, + *, + verbose: bool = False, +) -> list[Path]: + """Save thought images alongside the main image.""" + saved_paths: list[Path] = [] + + for thought in parsed.thought_images: + ext = get_extension_for_format(thought.format) + thought_path = output_dir / f"{main_image_path.stem}_thought{thought.index}{ext}" + + save_image(thought.data, thought_path, correct_extension=True) + saved_paths.append(thought_path) + + if verbose: + logger.info( + "thought_image_saved", + index=thought.index, + path=str(thought_path), + ) + + return saved_paths + + +def _build_story_prompt(base_prompt: str, part_num: int, total_parts: int) -> str: + """Build the prompt for a specific story part.""" + if part_num == 1: + return ( + f"{base_prompt}\n\n" + f"This is part 1 of {total_parts}. Create the opening scene that " + "establishes the context and visual style for the entire sequence." + ) + if part_num == total_parts: + return ( + f"This is part {part_num} of {total_parts}, the final scene. " + "Building on the previous image, create a concluding scene that " + "resolves the narrative. Maintain visual consistency with the " + "established style." + ) + return ( + f"This is part {part_num} of {total_parts}. Building on the previous " + "image, advance the narrative while maintaining visual consistency " + "with the established style." + ) diff --git a/packages/gemini-image/src/gemini_image/io.py b/packages/gemini-image/src/gemini_image/io.py new file mode 100644 index 0000000..639deaa --- /dev/null +++ b/packages/gemini-image/src/gemini_image/io.py @@ -0,0 +1,336 @@ +"""File I/O operations for Gemini image generation. + +This module handles all file operations including: +- Magic byte detection for image format identification +- Safe file saving with extension correction +- Image loading and validation +""" + +from __future__ import annotations + +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import structlog + +from gemini_image.exceptions import FileOperationError, FormatDetectionError + +if TYPE_CHECKING: + from gemini_image.models import AspectRatio, ImageSize, ModelKey + +logger = structlog.get_logger(__name__) + +# Magic byte signatures for image format detection +# Reference: https://en.wikipedia.org/wiki/List_of_file_signatures +IMAGE_SIGNATURES: dict[bytes, str] = { + b"\x89PNG\r\n\x1a\n": "png", + b"\xff\xd8\xff": "jpeg", + b"GIF87a": "gif", + b"GIF89a": "gif", + b"RIFF": "webp", # WebP starts with RIFF....WEBP +} + +# MIME type to extension mapping +MIME_TO_EXTENSION: dict[str, str] = { + "image/png": ".png", + "image/jpeg": ".jpg", + "image/gif": ".gif", + "image/webp": ".webp", +} + +# Extension to MIME type mapping +EXTENSION_TO_MIME: dict[str, str] = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", +} + + +def detect_image_format(data: bytes) -> str: + """Detect image format from magic bytes. + + This function examines the first few bytes of image data to determine + the actual format, regardless of filename extension. This is critical + because the Gemini API sometimes returns JPEG data with a PNG extension. + + Args: + data: Raw image bytes. + + Returns: + Format string: 'png', 'jpeg', 'gif', or 'webp'. + + Raises: + FormatDetectionError: If the format cannot be detected. + + Example: + >>> with open("image.png", "rb") as f: + ... data = f.read() + >>> format = detect_image_format(data) + >>> print(format) # 'png' or 'jpeg' based on actual content + + """ + if not data: + raise FormatDetectionError("Empty image data", magic_bytes=b"") + + # Check for PNG (8-byte signature) + if data.startswith(b"\x89PNG\r\n\x1a\n"): + return "png" + + # Check for JPEG (3-byte signature) + if data.startswith(b"\xff\xd8\xff"): + return "jpeg" + + # Check for GIF (6-byte signature) + if data.startswith(b"GIF87a") or data.startswith(b"GIF89a"): + return "gif" + + # Check for WebP (RIFF....WEBP) + if data.startswith(b"RIFF") and len(data) >= 12 and data[8:12] == b"WEBP": + return "webp" + + # Unknown format + raise FormatDetectionError( + "Unknown image format", + magic_bytes=data[:16], + ) + + +def get_extension_for_format(image_format: str) -> str: + """Get the correct file extension for an image format. + + Args: + image_format: Format string ('png', 'jpeg', 'gif', 'webp'). + + Returns: + File extension including the dot (e.g., '.png'). + + """ + extensions = { + "png": ".png", + "jpeg": ".jpg", + "gif": ".gif", + "webp": ".webp", + } + return extensions.get(image_format.lower(), ".png") + + +def get_mime_type_for_format(image_format: str) -> str: + """Get the MIME type for an image format. + + Args: + image_format: Format string ('png', 'jpeg', 'gif', 'webp'). + + Returns: + MIME type string (e.g., 'image/png'). + + """ + mime_types = { + "png": "image/png", + "jpeg": "image/jpeg", + "gif": "image/gif", + "webp": "image/webp", + } + return mime_types.get(image_format.lower(), "image/png") + + +def save_image( + data: bytes, + path: Path, + *, + correct_extension: bool = True, + create_parents: bool = True, +) -> Path: + """Save image data to a file with optional extension correction. + + This function detects the actual image format from the data and + optionally corrects the file extension to match. This prevents + issues where the API returns JPEG data but the user specifies a .png + extension. + + Args: + data: Raw image bytes to save. + path: Target file path. + correct_extension: If True, corrects the extension based on actual format. + create_parents: If True, creates parent directories as needed. + + Returns: + Path where the image was actually saved (may differ from input if + extension was corrected). + + Raises: + FormatDetectionError: If the image format cannot be detected. + FileOperationError: If the file cannot be written. + + Example: + >>> # User specifies .png but data is actually JPEG + >>> actual_path = save_image(jpeg_data, Path("image.png")) + >>> print(actual_path) # Path("image.jpg") + + """ + # Detect actual format + actual_format = detect_image_format(data) + actual_extension = get_extension_for_format(actual_format) + + # Determine final path + if correct_extension and path.suffix.lower() != actual_extension: + corrected_path = path.with_suffix(actual_extension) + logger.info( + "correcting_file_extension", + original=str(path), + corrected=str(corrected_path), + detected_format=actual_format, + ) + path = corrected_path + + # Create parent directories + if create_parents: + path.parent.mkdir(parents=True, exist_ok=True) + + # Write the file + try: + with open(path, "wb") as f: + f.write(data) + logger.debug("image_saved", path=str(path), size=len(data)) + except OSError as e: + raise FileOperationError( + f"Failed to save image: {e}", + path=str(path), + operation="write", + ) from e + + return path + + +def save_metadata( + image_path: Path, + *, + prompt: str, + model: ModelKey, + aspect_ratio: AspectRatio | None = None, + image_size: ImageSize | None = None, + reference_images: list[Path] | None = None, + thought_signature: str | None = None, + is_draft: bool = False, + extra: dict[str, Any] | None = None, +) -> Path: + """Save metadata sidecar JSON file for a generated image. + + Creates a JSON file alongside the image with generation metadata. + This enables tracking of generation parameters and supports the + finalize workflow. + + Args: + image_path: Path to the generated image. + prompt: The prompt used for generation. + model: The model key used. + aspect_ratio: The aspect ratio used (if any). + image_size: The image size used (if any). + reference_images: List of reference image paths used. + thought_signature: The thought signature from the API (if any). + is_draft: Whether this is a draft image. + extra: Additional metadata to include. + + Returns: + Path to the saved metadata file. + + """ + metadata: dict[str, Any] = { + "prompt": prompt, + "model": model, + "created_at": datetime.now(tz=timezone.utc).isoformat(), + "is_draft": is_draft, + } + + if aspect_ratio: + metadata["aspect_ratio"] = aspect_ratio + if image_size: + metadata["image_size"] = image_size + if reference_images: + metadata["reference_images"] = [str(p) for p in reference_images] + if thought_signature: + metadata["thought_signature"] = thought_signature + if extra: + metadata.update(extra) + + # Write metadata file + metadata_path = image_path.with_suffix(".json") + try: + with open(metadata_path, "w") as f: + json.dump(metadata, f, indent=2) + logger.debug("metadata_saved", path=str(metadata_path)) + except OSError as e: + raise FileOperationError( + f"Failed to save metadata: {e}", + path=str(metadata_path), + operation="write", + ) from e + + return metadata_path + + +def load_metadata(image_path: Path) -> dict[str, Any] | None: + """Load metadata sidecar JSON file for a generated image. + + Args: + image_path: Path to the generated image. + + Returns: + Dictionary with metadata, or None if metadata file doesn't exist. + + Raises: + FileOperationError: If the metadata file exists but cannot be read. + + """ + metadata_path = image_path.with_suffix(".json") + + if not metadata_path.exists(): + return None + + try: + with open(metadata_path) as f: + return json.load(f) + except (OSError, json.JSONDecodeError) as e: + raise FileOperationError( + f"Failed to load metadata: {e}", + path=str(metadata_path), + operation="read", + ) from e + + +def validate_image_file(path: Path) -> tuple[str, int]: + """Validate that a file is a readable image and return its format. + + Args: + path: Path to the image file. + + Returns: + Tuple of (format string, file size in bytes). + + Raises: + FileOperationError: If the file doesn't exist or can't be read. + FormatDetectionError: If the file is not a valid image. + + """ + if not path.exists(): + raise FileOperationError( + f"Image file not found: {path}", + path=str(path), + operation="read", + ) + + try: + with open(path, "rb") as f: + data = f.read() + except OSError as e: + raise FileOperationError( + f"Cannot read image file: {e}", + path=str(path), + operation="read", + ) from e + + image_format = detect_image_format(data) + return image_format, len(data) diff --git a/packages/gemini-image/src/gemini_image/registry.py b/packages/gemini-image/src/gemini_image/registry.py new file mode 100644 index 0000000..d6687e8 --- /dev/null +++ b/packages/gemini-image/src/gemini_image/registry.py @@ -0,0 +1,282 @@ +"""PROMPTS.md registry for documenting generated images. + +This module manages a markdown file that documents all generated images, +their prompts, parameters, and metadata. This provides: +- Audit trail of generations +- Easy search and reference +- Reproducibility information +""" + +from __future__ import annotations + +import re +from datetime import datetime, timezone +from pathlib import Path +from typing import TYPE_CHECKING + +import structlog + +from gemini_image.exceptions import FileOperationError + +if TYPE_CHECKING: + from gemini_image.models import AspectRatio, ImageSize, ModelKey + +logger = structlog.get_logger(__name__) + +# Default registry filename +DEFAULT_REGISTRY_NAME = "PROMPTS.md" + +# Template for new registry file +REGISTRY_TEMPLATE = """# Image Generation Registry + +This file documents all images generated using the Gemini Image library. + +## Index + +| Image | Model | Date | Type | +|-------|-------|------|------| + +--- + +## Entries + +""" + +# Template for a single entry +ENTRY_TEMPLATE = """### {filename} + +| Property | Value | +|----------|-------| +| **File** | `{filepath}` | +| **Model** | {model} | +| **Date** | {date} | +| **Type** | {image_type} | +| **Aspect Ratio** | {aspect_ratio} | +| **Size** | {image_size} | + +**Prompt:** +``` +{prompt} +``` + +{references_section} +--- + +""" + + +class PromptRegistry: + """Manages a PROMPTS.md file for documenting image generations. + + The registry maintains a markdown file with: + - An index table for quick reference + - Detailed entries for each generation with full metadata + - Idempotent updates (won't duplicate entries) + + Example: + >>> registry = PromptRegistry(Path("output/PROMPTS.md")) + >>> registry.add_entry( + ... image_path=Path("output/image.png"), + ... prompt="A beautiful sunset", + ... model="pro", + ... aspect_ratio="16:9", + ... image_size="2K", + ... ) + + """ + + def __init__(self, registry_path: Path | None = None) -> None: + """Initialize the registry. + + Args: + registry_path: Path to the PROMPTS.md file. If not provided, + uses PROMPTS.md in the current directory. + + """ + self.path = registry_path or Path(DEFAULT_REGISTRY_NAME) + + def _ensure_exists(self) -> None: + """Ensure the registry file exists, creating it if needed.""" + if not self.path.exists(): + self.path.parent.mkdir(parents=True, exist_ok=True) + try: + with open(self.path, "w") as f: + f.write(REGISTRY_TEMPLATE) + logger.info("registry_created", path=str(self.path)) + except OSError as e: + raise FileOperationError( + f"Failed to create registry: {e}", + path=str(self.path), + operation="write", + ) from e + + def _read_content(self) -> str: + """Read the current registry content.""" + self._ensure_exists() + try: + with open(self.path) as f: + return f.read() + except OSError as e: + raise FileOperationError( + f"Failed to read registry: {e}", + path=str(self.path), + operation="read", + ) from e + + def _write_content(self, content: str) -> None: + """Write content to the registry.""" + try: + with open(self.path, "w") as f: + f.write(content) + except OSError as e: + raise FileOperationError( + f"Failed to write registry: {e}", + path=str(self.path), + operation="write", + ) from e + + def _entry_exists(self, content: str, filename: str) -> bool: + """Check if an entry already exists for a filename.""" + # Look for the entry header + pattern = rf"^### {re.escape(filename)}$" + return bool(re.search(pattern, content, re.MULTILINE)) + + def add_entry( + self, + image_path: Path, + prompt: str, + model: ModelKey, + *, + aspect_ratio: AspectRatio | None = None, + image_size: ImageSize | None = None, + reference_images: list[Path] | None = None, + is_draft: bool = False, + ) -> bool: + """Add an entry to the registry. + + This method is idempotent - it won't add duplicate entries for + the same filename. + + Args: + image_path: Path to the generated image. + prompt: The prompt used for generation. + model: The model key used. + aspect_ratio: The aspect ratio used (if any). + image_size: The image size used (if any). + reference_images: List of reference image paths used. + is_draft: Whether this is a draft image. + + Returns: + True if entry was added, False if it already existed. + + """ + content = self._read_content() + filename = image_path.name + + # Check for existing entry + if self._entry_exists(content, filename): + logger.debug("registry_entry_exists", filename=filename) + return False + + # Format the entry + image_type = "Draft" if is_draft else "Final" + date_str = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d %H:%M UTC") + + # Build references section + references_section = "" + if reference_images: + ref_list = "\n".join(f"- `{p.name}`" for p in reference_images) + references_section = f"**Reference Images:**\n{ref_list}\n\n" + + entry = ENTRY_TEMPLATE.format( + filename=filename, + filepath=str(image_path), + model=model, + date=date_str, + image_type=image_type, + aspect_ratio=aspect_ratio or "default", + image_size=image_size or "default", + prompt=prompt, + references_section=references_section, + ) + + # Add to index table + index_row = f"| {filename} | {model} | {date_str.split()[0]} | {image_type} |\n" + + # Find the index table and add row + index_pattern = r"(\| Image \| Model \| Date \| Type \|\n\|[-|]+\|\n)" + index_match = re.search(index_pattern, content) + + if index_match: + # Insert row after table header + insert_pos = index_match.end() + content = content[:insert_pos] + index_row + content[insert_pos:] + else: + logger.warning("registry_index_not_found", path=str(self.path)) + + # Add entry at the end (before final ---) + if content.rstrip().endswith("---"): + content = content.rstrip()[:-3] + entry + else: + content = content.rstrip() + "\n\n" + entry + + self._write_content(content) + logger.info("registry_entry_added", filename=filename, model=model) + return True + + def find_entry(self, filename: str) -> dict[str, str] | None: + """Find an entry by filename. + + Args: + filename: Name of the image file to find. + + Returns: + Dictionary with entry metadata, or None if not found. + + """ + content = self._read_content() + + # Find the entry section + pattern = rf"### {re.escape(filename)}\n(.*?)(?=\n### |\n---\n*$|\Z)" + match = re.search(pattern, content, re.DOTALL) + + if not match: + return None + + entry_text = match.group(1) + result: dict[str, str] = {"filename": filename} + + # Extract table values + table_pattern = r"\| \*\*(\w+)\*\* \| (.+?) \|" + for prop_match in re.finditer(table_pattern, entry_text): + key = prop_match.group(1).lower() + value = prop_match.group(2).strip().strip("`") + result[key] = value + + # Extract prompt + prompt_pattern = r"\*\*Prompt:\*\*\n```\n(.*?)\n```" + prompt_match = re.search(prompt_pattern, entry_text, re.DOTALL) + if prompt_match: + result["prompt"] = prompt_match.group(1) + + return result + + def get_all_entries(self) -> list[dict[str, str]]: + """Get all entries from the registry. + + Returns: + List of dictionaries with entry metadata. + + """ + content = self._read_content() + entries: list[dict[str, str]] = [] + + # Find all entry headers + pattern = r"### (.+?)\.(?:png|jpg|jpeg|gif|webp)\n" + for match in re.finditer(pattern, content, re.IGNORECASE): + filename = match.group(0).strip().lstrip("#").strip() + entry = self.find_entry(filename) + if entry: + entries.append(entry) + + return entries diff --git a/packages/gemini-image/src/gemini_image/response_parser.py b/packages/gemini-image/src/gemini_image/response_parser.py new file mode 100644 index 0000000..2291df8 --- /dev/null +++ b/packages/gemini-image/src/gemini_image/response_parser.py @@ -0,0 +1,261 @@ +"""Response parsing for Gemini API image generation. + +This module handles parsing and processing of Gemini API responses, +extracting image data, thought images, signatures, and text. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + +import structlog + +from gemini_image.exceptions import GenerationError +from gemini_image.io import detect_image_format, get_mime_type_for_format + +logger = structlog.get_logger(__name__) + + +@dataclass +class ThoughtImage: + """An intermediate thought image from the generation process. + + Attributes: + data: Raw image bytes. + mime_type: MIME type of the image. + index: Index of this thought in the sequence. + text: Optional reasoning text associated with this thought. + + """ + + data: bytes + mime_type: str + index: int + text: str | None = None + + @property + def format(self) -> str: + """Get the image format from the data.""" + return detect_image_format(self.data) + + +@dataclass +class GenerationResponse: + """Parsed response from Gemini image generation API. + + This class provides a structured representation of the API response, + separating the final image from intermediate thoughts. + + Attributes: + image_data: Raw bytes of the final generated image. + image_format: Detected format of the image ('png', 'jpeg', etc.). + mime_type: MIME type of the image. + thought_images: List of intermediate thought images (if thinking mode). + thought_signature: Signature for thought continuity (if available). + text_response: Any text in the response (model comments). + + """ + + image_data: bytes | None = None + image_format: str | None = None + mime_type: str | None = None + thought_images: list[ThoughtImage] = field(default_factory=list) + thought_signature: str | bytes | None = None + text_response: str | None = None + + @property + def has_image(self) -> bool: + """Check if the response contains a final image.""" + return self.image_data is not None + + @property + def thought_count(self) -> int: + """Get the number of thought images.""" + return len(self.thought_images) + + +def parse_response(response: Any, *, verbose: bool = False) -> GenerationResponse: + """Parse a Gemini API response into a structured format. + + This function extracts all relevant data from the API response: + - Final image data and format + - Intermediate thought images (for thinking mode) + - Thought signatures for continuity + - Text responses and reasoning + + Args: + response: Raw response from the Gemini API. + verbose: If True, logs detailed parsing information. + + Returns: + GenerationResponse with parsed data. + + Raises: + GenerationError: If the response has no candidates or is blocked. + + """ + result = GenerationResponse() + + # Check for valid response + if not response.candidates: + _handle_empty_response(response) + raise GenerationError( + "No response candidates returned", + details={"feedback": _get_feedback(response)}, + ) + + # Process all parts in the response + thought_index = 0 + + for part in response.candidates[0].content.parts: + is_thought = hasattr(part, "thought") and part.thought + + if is_thought: + thought_index += 1 + _process_thought_part(part, thought_index, result, verbose=verbose) + else: + _process_final_part(part, result, verbose=verbose) + + # Detect format for final image + if result.image_data: + try: + result.image_format = detect_image_format(result.image_data) + result.mime_type = get_mime_type_for_format(result.image_format) + except Exception as e: + logger.warning("format_detection_failed", error=str(e)) + result.image_format = "png" + result.mime_type = "image/png" + + if verbose and result.thought_count > 0: + logger.info( + "response_parsed", + thought_count=result.thought_count, + has_image=result.has_image, + has_signature=result.thought_signature is not None, + ) + + return result + + +def _handle_empty_response(response: Any) -> None: + """Log details about an empty response.""" + feedback = _get_feedback(response) + if feedback: + logger.error("generation_blocked", feedback=feedback) + else: + logger.error("generation_failed_no_candidates") + + +def _get_feedback(response: Any) -> dict[str, Any] | None: + """Extract feedback from response if available.""" + if hasattr(response, "prompt_feedback"): + feedback = response.prompt_feedback + if hasattr(feedback, "__dict__"): + return dict(feedback.__dict__) + return {"raw": str(feedback)} + return None + + +def _process_thought_part( + part: Any, + index: int, + result: GenerationResponse, + *, + verbose: bool = False, +) -> None: + """Process a thought (intermediate reasoning) part.""" + if verbose: + logger.debug("processing_thought", index=index) + + # Extract thought text + thought_text = None + if part.text is not None: + thought_text = part.text + if verbose: + logger.debug("thought_reasoning", index=index, text=thought_text[:100]) + + # Extract thought image + if part.inline_data is not None: + thought_image = ThoughtImage( + data=part.inline_data.data, + mime_type=part.inline_data.mime_type, + index=index, + text=thought_text, + ) + result.thought_images.append(thought_image) + + if verbose: + logger.debug( + "thought_image_extracted", + index=index, + size=len(thought_image.data), + ) + + +def _process_final_part( + part: Any, + result: GenerationResponse, + *, + verbose: bool = False, +) -> None: + """Process a final (non-thought) part.""" + # Handle image data + if part.inline_data is not None: + result.image_data = part.inline_data.data + result.mime_type = part.inline_data.mime_type + + if verbose: + logger.debug( + "final_image_extracted", + size=len(result.image_data), + mime_type=result.mime_type, + ) + + # Extract thought signature if available + if hasattr(part, "thought_signature") and part.thought_signature: + result.thought_signature = part.thought_signature + if verbose: + sig_preview = str(result.thought_signature)[:50] + logger.debug("thought_signature_extracted", preview=sig_preview) + + # Handle text response + elif part.text is not None: + result.text_response = part.text + + if verbose: + logger.debug("text_response", text=part.text[:100]) + + # Check for thought signature in text part + if hasattr(part, "thought_signature") and part.thought_signature: + result.thought_signature = part.thought_signature + + +def extract_safety_ratings(response: Any) -> list[dict[str, Any]]: + """Extract safety ratings from a response. + + Args: + response: Raw response from the Gemini API. + + Returns: + List of safety rating dictionaries. + + """ + ratings: list[dict[str, Any]] = [] + + if not response.candidates: + return ratings + + candidate = response.candidates[0] + if hasattr(candidate, "safety_ratings") and candidate.safety_ratings: + for rating in candidate.safety_ratings: + rating_dict: dict[str, Any] = {} + if hasattr(rating, "category"): + rating_dict["category"] = str(rating.category) + if hasattr(rating, "probability"): + rating_dict["probability"] = str(rating.probability) + if hasattr(rating, "blocked"): + rating_dict["blocked"] = rating.blocked + ratings.append(rating_dict) + + return ratings diff --git a/packages/gemini-image/tests/conftest.py b/packages/gemini-image/tests/conftest.py index 8d3e09a..c4e0208 100644 --- a/packages/gemini-image/tests/conftest.py +++ b/packages/gemini-image/tests/conftest.py @@ -22,6 +22,13 @@ def sample_image_bytes() -> bytes: ) +@pytest.fixture +def sample_jpeg_bytes() -> bytes: + """Return minimal JPEG magic bytes for testing.""" + # JPEG magic bytes followed by padding + return b"\xff\xd8\xff\xe0\x00\x10JFIF\x00" + b"\x00" * 100 + + @pytest.fixture def sample_image_path(tmp_path: Path, sample_image_bytes: bytes) -> Path: """Create a temporary sample image file.""" diff --git a/packages/gemini-image/tests/test_exceptions.py b/packages/gemini-image/tests/test_exceptions.py new file mode 100644 index 0000000..e10e52e --- /dev/null +++ b/packages/gemini-image/tests/test_exceptions.py @@ -0,0 +1,166 @@ +"""Tests for custom exceptions module.""" + +from __future__ import annotations + +from gemini_image.exceptions import ( + APIError, + ConfigurationError, + ContentBlockedError, + FileOperationError, + FormatDetectionError, + GeminiImageError, + GenerationError, + RateLimitError, + ServerError, + ValidationError, +) + + +class TestGeminiImageError: + """Tests for base GeminiImageError.""" + + def test_basic_message(self) -> None: + """Test exception with basic message.""" + exc = GeminiImageError("Test error") + assert str(exc) == "Test error" + assert exc.message == "Test error" + + def test_with_details(self) -> None: + """Test exception with details dictionary.""" + exc = GeminiImageError("Test error", details={"key": "value"}) + assert "key='value'" in str(exc) + assert exc.details == {"key": "value"} + + def test_inheritance(self) -> None: + """Test that exception inherits from Exception.""" + exc = GeminiImageError("Test") + assert isinstance(exc, Exception) + + +class TestConfigurationError: + """Tests for ConfigurationError.""" + + def test_basic(self) -> None: + """Test basic configuration error.""" + exc = ConfigurationError("Missing API key") + assert "Missing API key" in str(exc) + assert isinstance(exc, GeminiImageError) + + +class TestAPIError: + """Tests for APIError and subclasses.""" + + def test_api_error_with_status(self) -> None: + """Test API error with status code.""" + exc = APIError("API failed", status_code=500) + assert exc.status_code == 500 + assert "status_code=500" in str(exc) + + def test_api_error_with_response(self) -> None: + """Test API error with response object.""" + exc = APIError("API failed", response={"error": "details"}) + assert exc.response == {"error": "details"} + + +class TestRateLimitError: + """Tests for RateLimitError.""" + + def test_rate_limit_with_retry_after(self) -> None: + """Test rate limit error with retry_after.""" + exc = RateLimitError(retry_after=30.0) + assert exc.retry_after == 30.0 + assert "retry_after=30.0" in str(exc) + assert isinstance(exc, APIError) + + def test_default_message(self) -> None: + """Test default error message.""" + exc = RateLimitError() + assert "rate limit" in str(exc).lower() + + +class TestServerError: + """Tests for ServerError.""" + + def test_server_error(self) -> None: + """Test server error.""" + exc = ServerError("Internal error", status_code=503) + assert exc.status_code == 503 + assert isinstance(exc, APIError) + + +class TestContentBlockedError: + """Tests for ContentBlockedError.""" + + def test_with_safety_ratings(self) -> None: + """Test content blocked with safety ratings.""" + ratings = [{"category": "HARM", "probability": "HIGH"}] + exc = ContentBlockedError(safety_ratings=ratings) + assert exc.safety_ratings == ratings + assert isinstance(exc, APIError) + + def test_default_message(self) -> None: + """Test default error message.""" + exc = ContentBlockedError() + assert "blocked" in str(exc).lower() or "safety" in str(exc).lower() + + +class TestValidationError: + """Tests for ValidationError.""" + + def test_with_field_info(self) -> None: + """Test validation error with field information.""" + exc = ValidationError( + "Invalid value", + field="model_key", + value="invalid", + valid_options=["flash", "pro"], + ) + assert exc.field == "model_key" + assert exc.value == "invalid" + assert exc.valid_options == ["flash", "pro"] + assert "field='model_key'" in str(exc) + + +class TestFormatDetectionError: + """Tests for FormatDetectionError.""" + + def test_with_magic_bytes(self) -> None: + """Test format detection error with magic bytes.""" + exc = FormatDetectionError(magic_bytes=b"\x00\x01\x02\x03") + assert exc.magic_bytes == b"\x00\x01\x02\x03" + # Magic bytes should be hex-encoded in details + assert "magic_bytes" in exc.details + + +class TestFileOperationError: + """Tests for FileOperationError.""" + + def test_with_path_and_operation(self) -> None: + """Test file operation error with path and operation.""" + exc = FileOperationError( + "Cannot write file", + path="/tmp/test.png", + operation="write", + ) + assert exc.path == "/tmp/test.png" + assert exc.operation == "write" + assert "path='/tmp/test.png'" in str(exc) + + +class TestGenerationError: + """Tests for GenerationError.""" + + def test_with_prompt_and_model(self) -> None: + """Test generation error with prompt and model.""" + exc = GenerationError( + "Generation failed", + prompt="A very long prompt that should be truncated", + model="pro", + ) + assert exc.model == "pro" + assert exc.prompt is not None + + def test_default_message(self) -> None: + """Test default error message.""" + exc = GenerationError() + assert "generation" in str(exc).lower() or "failed" in str(exc).lower() diff --git a/packages/gemini-image/tests/test_generator.py b/packages/gemini-image/tests/test_generator.py index 8c6d027..ad39822 100644 --- a/packages/gemini-image/tests/test_generator.py +++ b/packages/gemini-image/tests/test_generator.py @@ -1,7 +1,5 @@ """Tests for image generation functions.""" -# Bandit B101 (assert_used) is expected in test files - pytest uses assert statements - from __future__ import annotations import os @@ -10,8 +8,13 @@ import pytest -from gemini_image import generator -from gemini_image.generator import finalize_draft, generate_story_sequence +from gemini_image import client as client_module +from gemini_image.exceptions import FileOperationError, ValidationError +from gemini_image.generator import ( + finalize_draft, + generate_image, + generate_story_sequence, +) if TYPE_CHECKING: from pathlib import Path @@ -21,33 +24,26 @@ class TestGenerateImage: """Tests for generate_image function.""" def test_generate_image_invalid_model_raises(self) -> None: - """Test that invalid model key raises ValueError.""" - # Mock genai to avoid ImportError - mock_genai = MagicMock() - mock_types = MagicMock() - - with ( - patch.object(generator, "_genai", mock_genai), - patch.object(generator, "_types", mock_types), - patch.dict(os.environ, {"GEMINI_API_KEY": "test-key"}), - pytest.raises(ValueError, match="Unknown model"), - ): - generator.generate_image("test prompt", model_key="invalid") # type: ignore[arg-type] + """Test that invalid model key raises ValidationError.""" + with pytest.raises(ValidationError, match="Unknown model"): + generate_image("test prompt", model_key="invalid") # type: ignore[arg-type] def test_generate_image_missing_api_key_raises(self) -> None: - """Test that missing API key raises ValueError.""" + """Test that missing API key raises ConfigurationError.""" + from gemini_image.exceptions import ConfigurationError + # Mock genai to avoid ImportError mock_genai = MagicMock() mock_types = MagicMock() with ( - patch.object(generator, "_genai", mock_genai), - patch.object(generator, "_types", mock_types), + patch.object(client_module, "_genai", mock_genai), + patch.object(client_module, "_types", mock_types), patch.dict(os.environ, {}, clear=True), ): os.environ.pop("GEMINI_API_KEY", None) - with pytest.raises(ValueError, match="GEMINI_API_KEY"): - generator.generate_image("test prompt") + with pytest.raises(ConfigurationError, match="GEMINI_API_KEY"): + generate_image("test prompt") def test_generate_image_with_mock_client( self, @@ -65,14 +61,15 @@ def test_generate_image_with_mock_client( # Patch the lazy-loaded modules with ( - patch.object(generator, "_genai", mock_genai), - patch.object(generator, "_types", mock_types), + patch.object(client_module, "_genai", mock_genai), + patch.object(client_module, "_types", mock_types), patch.dict(os.environ, {"GEMINI_API_KEY": "test-key"}), ): - result = generator.generate_image( + result = generate_image( prompt="A test image", output_dir=tmp_path, verbose=False, + document=False, # Disable registry for test ) assert result is not None @@ -93,14 +90,15 @@ def test_generate_image_draft_mode_uses_1k( mock_genai.Client.return_value = mock_client with ( - patch.object(generator, "_genai", mock_genai), - patch.object(generator, "_types", mock_types), + patch.object(client_module, "_genai", mock_genai), + patch.object(client_module, "_types", mock_types), patch.dict(os.environ, {"GEMINI_API_KEY": "test-key"}), ): - result = generator.generate_image( + result = generate_image( prompt="A test draft", output_dir=tmp_path, is_draft=True, + document=False, ) assert result is not None @@ -111,8 +109,8 @@ class TestGenerateStorySequence: """Tests for generate_story_sequence function.""" def test_story_sequence_invalid_parts_raises(self) -> None: - """Test that num_parts < 1 raises ValueError.""" - with pytest.raises(ValueError, match="at least 1"): + """Test that num_parts < 1 raises ValidationError.""" + with pytest.raises(ValidationError, match="at least 1"): generate_story_sequence("test story", num_parts=0) def test_story_sequence_generates_multiple_images( @@ -129,15 +127,16 @@ def test_story_sequence_generates_multiple_images( mock_genai.Client.return_value = mock_client with ( - patch.object(generator, "_genai", mock_genai), - patch.object(generator, "_types", mock_types), + patch.object(client_module, "_genai", mock_genai), + patch.object(client_module, "_types", mock_types), patch.dict(os.environ, {"GEMINI_API_KEY": "test-key"}), ): - results = generator.generate_story_sequence( + results = generate_story_sequence( base_prompt="A test story", num_parts=3, output_dir=tmp_path, output_prefix=tmp_path / "story", + document=False, ) assert len(results) == 3 @@ -150,10 +149,10 @@ class TestFinalizeDraft: """Tests for finalize_draft function.""" def test_finalize_missing_draft_raises(self, tmp_path: Path) -> None: - """Test that missing draft image raises FileNotFoundError.""" + """Test that missing draft image raises FileOperationError.""" missing_path = tmp_path / "nonexistent.png" - with pytest.raises(FileNotFoundError): + with pytest.raises(FileOperationError): finalize_draft(missing_path) def test_finalize_draft_uses_2k_by_default( @@ -171,13 +170,14 @@ def test_finalize_draft_uses_2k_by_default( mock_genai.Client.return_value = mock_client with ( - patch.object(generator, "_genai", mock_genai), - patch.object(generator, "_types", mock_types), + patch.object(client_module, "_genai", mock_genai), + patch.object(client_module, "_types", mock_types), patch.dict(os.environ, {"GEMINI_API_KEY": "test-key"}), ): - result = generator.finalize_draft( + result = finalize_draft( draft_path=sample_image_path, output_dir=tmp_path, + document=False, ) assert result is not None diff --git a/packages/gemini-image/tests/test_io.py b/packages/gemini-image/tests/test_io.py new file mode 100644 index 0000000..347879a --- /dev/null +++ b/packages/gemini-image/tests/test_io.py @@ -0,0 +1,245 @@ +"""Tests for I/O operations module.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from gemini_image.exceptions import FileOperationError, FormatDetectionError +from gemini_image.io import ( + detect_image_format, + get_extension_for_format, + get_mime_type_for_format, + load_metadata, + save_image, + save_metadata, + validate_image_file, +) + + +class TestDetectImageFormat: + """Tests for detect_image_format function.""" + + def test_detect_png(self, sample_image_bytes: bytes) -> None: + """Test detecting PNG format from magic bytes.""" + assert detect_image_format(sample_image_bytes) == "png" + + def test_detect_jpeg(self) -> None: + """Test detecting JPEG format from magic bytes.""" + # JPEG magic bytes: FF D8 FF + jpeg_data = b"\xff\xd8\xff\xe0\x00\x10JFIF\x00" + b"\x00" * 100 + assert detect_image_format(jpeg_data) == "jpeg" + + def test_detect_gif87a(self) -> None: + """Test detecting GIF87a format.""" + gif_data = b"GIF87a" + b"\x00" * 100 + assert detect_image_format(gif_data) == "gif" + + def test_detect_gif89a(self) -> None: + """Test detecting GIF89a format.""" + gif_data = b"GIF89a" + b"\x00" * 100 + assert detect_image_format(gif_data) == "gif" + + def test_detect_webp(self) -> None: + """Test detecting WebP format.""" + # WebP magic bytes: RIFF....WEBP + webp_data = b"RIFF\x00\x00\x00\x00WEBP" + b"\x00" * 100 + assert detect_image_format(webp_data) == "webp" + + def test_empty_data_raises(self) -> None: + """Test that empty data raises FormatDetectionError.""" + with pytest.raises(FormatDetectionError, match="Empty image data"): + detect_image_format(b"") + + def test_unknown_format_raises(self) -> None: + """Test that unknown format raises FormatDetectionError.""" + with pytest.raises(FormatDetectionError, match="Unknown image format"): + detect_image_format(b"NOT AN IMAGE FORMAT DATA") + + +class TestGetExtensionForFormat: + """Tests for get_extension_for_format function.""" + + def test_png_extension(self) -> None: + """Test PNG format returns .png.""" + assert get_extension_for_format("png") == ".png" + + def test_jpeg_extension(self) -> None: + """Test JPEG format returns .jpg.""" + assert get_extension_for_format("jpeg") == ".jpg" + + def test_gif_extension(self) -> None: + """Test GIF format returns .gif.""" + assert get_extension_for_format("gif") == ".gif" + + def test_webp_extension(self) -> None: + """Test WebP format returns .webp.""" + assert get_extension_for_format("webp") == ".webp" + + def test_unknown_defaults_to_png(self) -> None: + """Test unknown format defaults to .png.""" + assert get_extension_for_format("unknown") == ".png" + + +class TestGetMimeTypeForFormat: + """Tests for get_mime_type_for_format function.""" + + def test_png_mime(self) -> None: + """Test PNG format returns image/png.""" + assert get_mime_type_for_format("png") == "image/png" + + def test_jpeg_mime(self) -> None: + """Test JPEG format returns image/jpeg.""" + assert get_mime_type_for_format("jpeg") == "image/jpeg" + + +class TestSaveImage: + """Tests for save_image function.""" + + def test_save_image_basic( + self, tmp_path: Path, sample_image_bytes: bytes + ) -> None: + """Test basic image saving.""" + output_path = tmp_path / "output.png" + result = save_image(sample_image_bytes, output_path) + + assert result == output_path + assert output_path.exists() + assert output_path.read_bytes() == sample_image_bytes + + def test_save_image_corrects_extension( + self, tmp_path: Path, sample_image_bytes: bytes + ) -> None: + """Test that extension is corrected when format doesn't match.""" + # sample_image_bytes is PNG, but we specify .jpg + output_path = tmp_path / "output.jpg" + result = save_image(sample_image_bytes, output_path, correct_extension=True) + + # Should be corrected to .png + assert result.suffix == ".png" + assert result.exists() + + def test_save_image_no_correction( + self, tmp_path: Path, sample_image_bytes: bytes + ) -> None: + """Test saving without extension correction.""" + output_path = tmp_path / "output.jpg" + result = save_image(sample_image_bytes, output_path, correct_extension=False) + + # Should keep .jpg even though data is PNG + assert result.suffix == ".jpg" + + def test_save_image_creates_parents( + self, tmp_path: Path, sample_image_bytes: bytes + ) -> None: + """Test that parent directories are created.""" + output_path = tmp_path / "subdir" / "nested" / "output.png" + result = save_image(sample_image_bytes, output_path, create_parents=True) + + assert result.exists() + assert result.parent.exists() + + +class TestSaveMetadata: + """Tests for save_metadata function.""" + + def test_save_metadata_basic( + self, tmp_path: Path, sample_image_bytes: bytes + ) -> None: + """Test basic metadata saving.""" + image_path = tmp_path / "image.png" + image_path.write_bytes(sample_image_bytes) + + metadata_path = save_metadata( + image_path, + prompt="Test prompt", + model="pro", + ) + + assert metadata_path.exists() + assert metadata_path.suffix == ".json" + + with open(metadata_path) as f: + data = json.load(f) + + assert data["prompt"] == "Test prompt" + assert data["model"] == "pro" + assert "created_at" in data + + def test_save_metadata_with_options( + self, tmp_path: Path, sample_image_bytes: bytes + ) -> None: + """Test metadata saving with all options.""" + image_path = tmp_path / "image.png" + image_path.write_bytes(sample_image_bytes) + ref_path = tmp_path / "ref.png" + + metadata_path = save_metadata( + image_path, + prompt="Test prompt", + model="pro", + aspect_ratio="16:9", + image_size="2K", + reference_images=[ref_path], + thought_signature="sig123", + is_draft=True, + ) + + with open(metadata_path) as f: + data = json.load(f) + + assert data["aspect_ratio"] == "16:9" + assert data["image_size"] == "2K" + assert data["is_draft"] is True + assert data["thought_signature"] == "sig123" + assert str(ref_path) in data["reference_images"] + + +class TestLoadMetadata: + """Tests for load_metadata function.""" + + def test_load_metadata_exists( + self, tmp_path: Path, sample_image_bytes: bytes + ) -> None: + """Test loading existing metadata.""" + image_path = tmp_path / "image.png" + image_path.write_bytes(sample_image_bytes) + + # Create metadata file + metadata = {"prompt": "Test", "model": "pro"} + metadata_path = image_path.with_suffix(".json") + with open(metadata_path, "w") as f: + json.dump(metadata, f) + + result = load_metadata(image_path) + assert result is not None + assert result["prompt"] == "Test" + + def test_load_metadata_not_exists(self, tmp_path: Path) -> None: + """Test loading non-existent metadata returns None.""" + image_path = tmp_path / "image.png" + result = load_metadata(image_path) + assert result is None + + +class TestValidateImageFile: + """Tests for validate_image_file function.""" + + def test_validate_png_file( + self, tmp_path: Path, sample_image_bytes: bytes + ) -> None: + """Test validating a PNG file.""" + image_path = tmp_path / "image.png" + image_path.write_bytes(sample_image_bytes) + + format_str, size = validate_image_file(image_path) + assert format_str == "png" + assert size == len(sample_image_bytes) + + def test_validate_missing_file_raises(self, tmp_path: Path) -> None: + """Test that missing file raises FileOperationError.""" + missing_path = tmp_path / "nonexistent.png" + with pytest.raises(FileOperationError, match="not found"): + validate_image_file(missing_path) diff --git a/packages/gemini-image/tests/test_registry.py b/packages/gemini-image/tests/test_registry.py new file mode 100644 index 0000000..29f466c --- /dev/null +++ b/packages/gemini-image/tests/test_registry.py @@ -0,0 +1,163 @@ +"""Tests for PROMPTS.md registry module.""" + +from __future__ import annotations + +from pathlib import Path + +from gemini_image.registry import PromptRegistry + + +class TestPromptRegistry: + """Tests for PromptRegistry class.""" + + def test_create_registry(self, tmp_path: Path) -> None: + """Test registry file is created when it doesn't exist.""" + registry_path = tmp_path / "PROMPTS.md" + registry = PromptRegistry(registry_path) + + # Access should create the file + registry._ensure_exists() + + assert registry_path.exists() + content = registry_path.read_text() + assert "Image Generation Registry" in content + + def test_add_entry(self, tmp_path: Path) -> None: + """Test adding an entry to the registry.""" + registry_path = tmp_path / "PROMPTS.md" + image_path = tmp_path / "test_image.png" + image_path.touch() + + registry = PromptRegistry(registry_path) + result = registry.add_entry( + image_path=image_path, + prompt="Test prompt for image generation", + model="pro", + aspect_ratio="16:9", + image_size="2K", + ) + + assert result is True + content = registry_path.read_text() + assert "test_image.png" in content + assert "Test prompt for image generation" in content + assert "pro" in content + assert "16:9" in content + assert "2K" in content + + def test_add_entry_idempotent(self, tmp_path: Path) -> None: + """Test that adding same entry twice is idempotent.""" + registry_path = tmp_path / "PROMPTS.md" + image_path = tmp_path / "test_image.png" + image_path.touch() + + registry = PromptRegistry(registry_path) + + # Add first time + result1 = registry.add_entry( + image_path=image_path, + prompt="Test prompt", + model="pro", + ) + assert result1 is True + + # Add second time (same filename) + result2 = registry.add_entry( + image_path=image_path, + prompt="Different prompt", + model="flash", + ) + assert result2 is False + + # Should only have one entry + content = registry_path.read_text() + assert content.count("### test_image.png") == 1 + + def test_add_entry_with_references(self, tmp_path: Path) -> None: + """Test adding entry with reference images.""" + registry_path = tmp_path / "PROMPTS.md" + image_path = tmp_path / "output.png" + ref1 = tmp_path / "ref1.png" + ref2 = tmp_path / "ref2.png" + image_path.touch() + + registry = PromptRegistry(registry_path) + registry.add_entry( + image_path=image_path, + prompt="Test with refs", + model="pro", + reference_images=[ref1, ref2], + ) + + content = registry_path.read_text() + assert "Reference Images" in content + assert "ref1.png" in content + assert "ref2.png" in content + + def test_add_draft_entry(self, tmp_path: Path) -> None: + """Test adding a draft entry.""" + registry_path = tmp_path / "PROMPTS.md" + image_path = tmp_path / "draft.png" + image_path.touch() + + registry = PromptRegistry(registry_path) + registry.add_entry( + image_path=image_path, + prompt="Draft prompt", + model="pro", + is_draft=True, + ) + + content = registry_path.read_text() + assert "Draft" in content + + def test_find_entry(self, tmp_path: Path) -> None: + """Test finding an entry by filename.""" + registry_path = tmp_path / "PROMPTS.md" + image_path = tmp_path / "findme.png" + image_path.touch() + + registry = PromptRegistry(registry_path) + registry.add_entry( + image_path=image_path, + prompt="Find this prompt", + model="flash", + ) + + result = registry.find_entry("findme.png") + assert result is not None + assert result["filename"] == "findme.png" + assert result["prompt"] == "Find this prompt" + + def test_find_entry_not_found(self, tmp_path: Path) -> None: + """Test finding non-existent entry returns None.""" + registry_path = tmp_path / "PROMPTS.md" + registry = PromptRegistry(registry_path) + registry._ensure_exists() + + result = registry.find_entry("nonexistent.png") + assert result is None + + def test_index_table_updated(self, tmp_path: Path) -> None: + """Test that the index table is updated when adding entries.""" + registry_path = tmp_path / "PROMPTS.md" + image_path = tmp_path / "indexed.png" + image_path.touch() + + registry = PromptRegistry(registry_path) + registry.add_entry( + image_path=image_path, + prompt="Indexed entry", + model="pro", + ) + + content = registry_path.read_text() + # Check index table has the entry + lines = content.split("\n") + index_found = False + for line in lines: + if "| indexed.png |" in line: + index_found = True + assert "pro" in line + break + assert index_found, "Entry not found in index table" diff --git a/packages/gemini-image/tests/test_response_parser.py b/packages/gemini-image/tests/test_response_parser.py new file mode 100644 index 0000000..81723ca --- /dev/null +++ b/packages/gemini-image/tests/test_response_parser.py @@ -0,0 +1,212 @@ +"""Tests for response parser module.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + +from gemini_image.exceptions import GenerationError +from gemini_image.response_parser import ( + GenerationResponse, + ThoughtImage, + extract_safety_ratings, + parse_response, +) + + +class TestThoughtImage: + """Tests for ThoughtImage dataclass.""" + + def test_format_detection(self, sample_image_bytes: bytes) -> None: + """Test that format is detected from data.""" + thought = ThoughtImage( + data=sample_image_bytes, + mime_type="image/png", + index=1, + ) + assert thought.format == "png" + + def test_with_text(self) -> None: + """Test thought image with reasoning text.""" + # Create minimal PNG data + png_data = b"\x89PNG\r\n\x1a\n" + b"\x00" * 100 + thought = ThoughtImage( + data=png_data, + mime_type="image/png", + index=1, + text="Thinking about composition...", + ) + assert thought.text == "Thinking about composition..." + + +class TestGenerationResponse: + """Tests for GenerationResponse dataclass.""" + + def test_has_image_true(self, sample_image_bytes: bytes) -> None: + """Test has_image returns True when image data present.""" + response = GenerationResponse( + image_data=sample_image_bytes, + image_format="png", + ) + assert response.has_image is True + + def test_has_image_false(self) -> None: + """Test has_image returns False when no image data.""" + response = GenerationResponse() + assert response.has_image is False + + def test_thought_count(self, sample_image_bytes: bytes) -> None: + """Test thought_count returns correct number.""" + response = GenerationResponse( + thought_images=[ + ThoughtImage(data=sample_image_bytes, mime_type="image/png", index=1), + ThoughtImage(data=sample_image_bytes, mime_type="image/png", index=2), + ] + ) + assert response.thought_count == 2 + + +class TestParseResponse: + """Tests for parse_response function.""" + + def test_parse_basic_response( + self, mock_genai_response: MagicMock, sample_image_bytes: bytes + ) -> None: + """Test parsing a basic response with image.""" + result = parse_response(mock_genai_response) + + assert result.has_image + assert result.image_data == sample_image_bytes + assert result.image_format == "png" + + def test_parse_empty_response_raises(self) -> None: + """Test that empty response raises GenerationError.""" + mock_response = MagicMock() + mock_response.candidates = [] + + with pytest.raises(GenerationError, match="No response candidates"): + parse_response(mock_response) + + def test_parse_response_with_thoughts( + self, sample_image_bytes: bytes + ) -> None: + """Test parsing response with thought images.""" + mock_response = MagicMock() + + # Create thought part + thought_part = MagicMock() + thought_part.thought = True + thought_part.text = "Analyzing composition..." + thought_part.inline_data = MagicMock() + thought_part.inline_data.data = sample_image_bytes + thought_part.inline_data.mime_type = "image/png" + + # Create final part + final_part = MagicMock() + final_part.thought = False + final_part.text = None + final_part.inline_data = MagicMock() + final_part.inline_data.data = sample_image_bytes + final_part.inline_data.mime_type = "image/png" + final_part.thought_signature = None + + mock_candidate = MagicMock() + mock_candidate.content.parts = [thought_part, final_part] + mock_response.candidates = [mock_candidate] + + result = parse_response(mock_response) + + assert result.has_image + assert result.thought_count == 1 + assert result.thought_images[0].text == "Analyzing composition..." + + def test_parse_response_with_text(self, sample_image_bytes: bytes) -> None: + """Test parsing response with text response.""" + mock_response = MagicMock() + + # Create text part + text_part = MagicMock() + text_part.thought = False + text_part.text = "Here is your image" + text_part.inline_data = None + + # Create image part + image_part = MagicMock() + image_part.thought = False + image_part.text = None + image_part.inline_data = MagicMock() + image_part.inline_data.data = sample_image_bytes + image_part.inline_data.mime_type = "image/png" + image_part.thought_signature = None + + mock_candidate = MagicMock() + mock_candidate.content.parts = [text_part, image_part] + mock_response.candidates = [mock_candidate] + + result = parse_response(mock_response) + + assert result.has_image + assert result.text_response == "Here is your image" + + def test_parse_response_with_thought_signature( + self, sample_image_bytes: bytes + ) -> None: + """Test parsing response with thought signature.""" + mock_response = MagicMock() + + mock_part = MagicMock() + mock_part.thought = False + mock_part.text = None + mock_part.inline_data = MagicMock() + mock_part.inline_data.data = sample_image_bytes + mock_part.inline_data.mime_type = "image/png" + mock_part.thought_signature = "signature_bytes_here" + + mock_candidate = MagicMock() + mock_candidate.content.parts = [mock_part] + mock_response.candidates = [mock_candidate] + + result = parse_response(mock_response) + + assert result.thought_signature == "signature_bytes_here" + + +class TestExtractSafetyRatings: + """Tests for extract_safety_ratings function.""" + + def test_extract_with_ratings(self) -> None: + """Test extracting safety ratings from response.""" + mock_response = MagicMock() + mock_rating = MagicMock() + mock_rating.category = "HARM_CATEGORY_DANGEROUS" + mock_rating.probability = "LOW" + mock_rating.blocked = False + + mock_candidate = MagicMock() + mock_candidate.safety_ratings = [mock_rating] + mock_response.candidates = [mock_candidate] + + ratings = extract_safety_ratings(mock_response) + + assert len(ratings) == 1 + assert "category" in ratings[0] + assert "probability" in ratings[0] + + def test_extract_empty_response(self) -> None: + """Test extracting from response with no candidates.""" + mock_response = MagicMock() + mock_response.candidates = [] + + ratings = extract_safety_ratings(mock_response) + assert ratings == [] + + def test_extract_no_ratings(self) -> None: + """Test extracting when no safety ratings present.""" + mock_response = MagicMock() + mock_candidate = MagicMock() + mock_candidate.safety_ratings = None + mock_response.candidates = [mock_candidate] + + ratings = extract_safety_ratings(mock_response) + assert ratings == [] diff --git a/uv.lock b/uv.lock index cdbaef7..08e4c08 100644 --- a/uv.lock +++ b/uv.lock @@ -436,10 +436,13 @@ provides-extras = ["async", "dev"] [[package]] name = "byronwilliamscpa-gemini-image" -version = "0.1.0" +version = "0.2.0" source = { editable = "packages/gemini-image" } dependencies = [ { name = "google-genai" }, + { name = "python-dotenv" }, + { name = "structlog" }, + { name = "tenacity" }, ] [package.optional-dependencies] @@ -455,6 +458,9 @@ requires-dist = [ { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.4.0" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.21.0" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.1.0" }, + { name = "python-dotenv", specifier = ">=1.0.0" }, + { name = "structlog", specifier = ">=24.1.0" }, + { name = "tenacity", specifier = ">=8.2.0" }, ] provides-extras = ["dev"] From e7867211851f24a190bb4131618f4ef2914c70f5 Mon Sep 17 00:00:00 2001 From: Byron Williams Date: Fri, 9 Jan 2026 13:29:36 -0800 Subject: [PATCH 02/11] feat(gemini-image): add batch processing and rich progress indicators Phase 2 - Feature Backport: - Add generate_batch() function for processing multiple prompts - Support JSON file input via --batch CLI flag - Add resume capability to skip existing output files - Add progress bar with rich library for batch/story operations Phase 3 - Production Hardening: - Add rich dependency for CLI progress indicators - Add spinner for single image generation (via _show_spinner helper) - Add --no-progress flag to disable progress display - Add batch mode summary output CLI Enhancements: - New --batch JSON_FILE option for batch processing - New --no-progress flag for script-friendly output - Batch file format: [{"prompt": "...", "aspect_ratio": "16:9", ...}, ...] Test additions: - test_batch_empty_list_raises: Validates empty prompt list - test_batch_generates_multiple_images: Verifies batch generation - test_batch_handles_invalid_prompt: Graceful error handling - test_batch_resume_skips_existing: Resume functionality All 90 tests passing. Co-Authored-By: Claude Opus 4.5 --- packages/gemini-image/pyproject.toml | 1 + .../gemini-image/src/gemini_image/__init__.py | 2 + packages/gemini-image/src/gemini_image/cli.py | 79 +++++++ .../src/gemini_image/generator.py | 192 ++++++++++++++++++ packages/gemini-image/tests/test_generator.py | 124 +++++++++++ uv.lock | 2 + 6 files changed, 400 insertions(+) diff --git a/packages/gemini-image/pyproject.toml b/packages/gemini-image/pyproject.toml index 93bcbef..05093cd 100644 --- a/packages/gemini-image/pyproject.toml +++ b/packages/gemini-image/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "tenacity>=8.2.0", "python-dotenv>=1.0.0", "structlog>=24.1.0", + "rich>=13.0.0", ] [project.optional-dependencies] diff --git a/packages/gemini-image/src/gemini_image/__init__.py b/packages/gemini-image/src/gemini_image/__init__.py index a122307..5632729 100644 --- a/packages/gemini-image/src/gemini_image/__init__.py +++ b/packages/gemini-image/src/gemini_image/__init__.py @@ -38,6 +38,7 @@ ) from gemini_image.generator import ( finalize_draft, + generate_batch, generate_image, generate_story_sequence, ) @@ -89,6 +90,7 @@ "ValidationError", "detect_image_format", "finalize_draft", + "generate_batch", "generate_image", "generate_story_sequence", "get_api_key", diff --git a/packages/gemini-image/src/gemini_image/cli.py b/packages/gemini-image/src/gemini_image/cli.py index c1811f0..05caf73 100644 --- a/packages/gemini-image/src/gemini_image/cli.py +++ b/packages/gemini-image/src/gemini_image/cli.py @@ -3,6 +3,7 @@ from __future__ import annotations import argparse +import json import sys from pathlib import Path @@ -10,6 +11,7 @@ from gemini_image.generator import ( finalize_draft, + generate_batch, generate_image, generate_story_sequence, ) @@ -18,6 +20,26 @@ logger = structlog.get_logger(__name__) +def _show_spinner(message: str) -> object: + """Create a spinner context manager for single operations.""" + try: + from rich.console import Console + from rich.status import Status + + console = Console() + return Status(message, console=console, spinner="dots") + except ImportError: + # Fallback to a simple context manager that does nothing + from contextlib import contextmanager + + @contextmanager + def noop(): + print(message + "...") + yield + + return noop() + + def _configure_logging(verbose: bool) -> None: """Configure structlog for CLI output.""" if verbose: @@ -76,6 +98,9 @@ def main() -> None: # Disable PROMPTS.md documentation %(prog)s "Private prompt" --no-document -o private.png + + # Batch processing from JSON file + %(prog)s --batch prompts.json -d ./output """, ) @@ -198,6 +223,19 @@ def main() -> None: help="List available models and exit", ) + parser.add_argument( + "--batch", + type=Path, + metavar="JSON_FILE", + help="Process batch of prompts from JSON file. Format: [{\"prompt\": \"...\", ...}, ...]", + ) + + parser.add_argument( + "--no-progress", + action="store_true", + help="Disable progress bar display (for batch/story modes)", + ) + args = parser.parse_args() # Configure logging based on verbosity @@ -207,6 +245,47 @@ def main() -> None: list_models() return + # Batch mode + if args.batch: + if not args.batch.exists(): + print(f"Error: Batch file not found: {args.batch}") + sys.exit(1) + + try: + with open(args.batch) as f: + prompts = json.load(f) + + if not isinstance(prompts, list): + print("Error: Batch file must contain a JSON array of prompt objects") + sys.exit(1) + + results = generate_batch( + prompts=prompts, + output_dir=args.output_dir, + resume=args.resume, + document=not args.no_document, + show_progress=not args.no_progress, + ) + + successful = sum(1 for r in results if r is not None) + print(f"\nBatch complete: {successful}/{len(prompts)} images generated") + + if successful > 0: + print("\nGenerated images:") + for i, result in enumerate(results, 1): + status = str(result) if result else "[FAILED]" + print(f" {i}. {status}") + + sys.exit(0 if successful == len(prompts) else 1) + + except json.JSONDecodeError as e: + print(f"Error: Invalid JSON in batch file: {e}") + sys.exit(1) + except Exception as e: + logger.error("batch_failed", error=str(e)) + print(f"Error: {e}") + sys.exit(1) + # Finalize mode if args.finalize: try: diff --git a/packages/gemini-image/src/gemini_image/generator.py b/packages/gemini-image/src/gemini_image/generator.py index d991e71..b45a05c 100644 --- a/packages/gemini-image/src/gemini_image/generator.py +++ b/packages/gemini-image/src/gemini_image/generator.py @@ -2,6 +2,7 @@ This module provides the main entry points for image generation: - generate_image(): Single image generation +- generate_batch(): Batch processing multiple prompts - generate_story_sequence(): Multi-part story generation - finalize_draft(): Draft-to-final upscaling """ @@ -180,6 +181,197 @@ def generate_image( return saved_path +def generate_batch( + prompts: list[dict[str, object]], + output_dir: Path | None = None, + *, + parallel: int = 1, + resume: bool = True, + document: bool = True, + show_progress: bool = True, +) -> list[Path | None]: + """Generate multiple images from a list of prompts. + + Each item in the prompts list should be a dictionary with at least a + 'prompt' key. Other supported keys match generate_image() parameters: + - prompt (required): Text description + - output_path: Specific output path + - model_key: Model to use (default: flash) + - aspect_ratio: Aspect ratio for pro model + - image_size: Image size + - reference_images: List of reference image paths + + Args: + prompts: List of prompt dictionaries. + output_dir: Output directory for generated images. + parallel: Number of concurrent generations (currently only 1 supported). + resume: If True, skip prompts that already have output files. + document: If True, adds entries to PROMPTS.md registry. + show_progress: If True, displays a progress bar. + + Returns: + List of paths to generated images (None for failed generations). + + Raises: + ValidationError: If prompts list is empty or a prompt is invalid. + + Example: + >>> prompts = [ + ... {"prompt": "A sunset over mountains", "aspect_ratio": "16:9"}, + ... {"prompt": "A forest in autumn", "model_key": "pro"}, + ... ] + >>> results = generate_batch(prompts, output_dir=Path("./images")) + + """ + if not prompts: + raise ValidationError( + "Prompts list cannot be empty", + field="prompts", + value=[], + ) + + if parallel > 1: + logger.warning( + "parallel_not_implemented", + requested=parallel, + using=1, + message="Parallel batch processing not yet implemented", + ) + parallel = 1 + + if output_dir is None: + output_dir = Path.cwd() + output_dir.mkdir(parents=True, exist_ok=True) + + results: list[Path | None] = [] + + logger.info( + "batch_generation_started", + total_prompts=len(prompts), + output_dir=str(output_dir), + ) + + # Import progress bar conditionally + progress_context: object + if show_progress: + try: + from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn + + progress_context = Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + ) + except ImportError: + logger.debug("rich_not_available_using_simple_progress") + progress_context = None + else: + progress_context = None + + def process_prompts(progress: object | None) -> list[Path | None]: + """Process prompts with optional progress tracking.""" + batch_results: list[Path | None] = [] + + task_id = None + if progress is not None: + task_id = progress.add_task( # type: ignore[union-attr] + f"Generating {len(prompts)} images...", + total=len(prompts), + ) + + for idx, prompt_config in enumerate(prompts, 1): + # Validate prompt config + if not isinstance(prompt_config, dict): + logger.error( + "invalid_prompt_config", + index=idx, + type=type(prompt_config).__name__, + ) + batch_results.append(None) + if progress is not None and task_id is not None: + progress.advance(task_id) # type: ignore[union-attr] + continue + + prompt_text = prompt_config.get("prompt") + if not prompt_text or not isinstance(prompt_text, str): + logger.error("missing_prompt_text", index=idx) + batch_results.append(None) + if progress is not None and task_id is not None: + progress.advance(task_id) # type: ignore[union-attr] + continue + + # Check if output already exists (resume support) + output_path = prompt_config.get("output_path") + if output_path is not None: + output_path = Path(output_path) # type: ignore[arg-type] + if resume and output_path is not None and output_path.exists(): + logger.info("skipping_existing", index=idx, path=str(output_path)) + batch_results.append(output_path) + if progress is not None and task_id is not None: + progress.advance(task_id) # type: ignore[union-attr] + continue + + # Extract other parameters + model_key = prompt_config.get("model_key", DEFAULT_MODEL) + aspect_ratio = prompt_config.get("aspect_ratio") + image_size = prompt_config.get("image_size") + reference_images = prompt_config.get("reference_images") + + if reference_images: + reference_images = [Path(p) for p in reference_images] # type: ignore[union-attr] + + logger.info( + "batch_generating", + index=idx, + total=len(prompts), + prompt_preview=prompt_text[:30] + "..." if len(prompt_text) > 30 else prompt_text, + ) + + try: + result = generate_image( + prompt=prompt_text, + model_key=model_key, # type: ignore[arg-type] + reference_images=reference_images, # type: ignore[arg-type] + output_path=output_path, + output_dir=output_dir, + aspect_ratio=aspect_ratio, # type: ignore[arg-type] + image_size=image_size, # type: ignore[arg-type] + document=document, + ) + batch_results.append(result) + except Exception as e: + logger.error( + "batch_item_failed", + index=idx, + error=str(e), + ) + batch_results.append(None) + + if progress is not None and task_id is not None: + progress.advance(task_id) # type: ignore[union-attr] + + return batch_results + + # Run with or without progress bar + if progress_context is not None: + with progress_context: # type: ignore[union-attr] + results = process_prompts(progress_context) + else: + results = process_prompts(None) + + # Summary + successful = sum(1 for r in results if r is not None) + logger.info( + "batch_generation_complete", + successful=successful, + failed=len(prompts) - successful, + total=len(prompts), + ) + + return results + + def generate_story_sequence( base_prompt: str, num_parts: int, diff --git a/packages/gemini-image/tests/test_generator.py b/packages/gemini-image/tests/test_generator.py index ad39822..094a783 100644 --- a/packages/gemini-image/tests/test_generator.py +++ b/packages/gemini-image/tests/test_generator.py @@ -12,6 +12,7 @@ from gemini_image.exceptions import FileOperationError, ValidationError from gemini_image.generator import ( finalize_draft, + generate_batch, generate_image, generate_story_sequence, ) @@ -182,3 +183,126 @@ def test_finalize_draft_uses_2k_by_default( assert result is not None assert "_final" in result.name + + +class TestGenerateBatch: + """Tests for generate_batch function.""" + + def test_batch_empty_list_raises(self) -> None: + """Test that empty prompts list raises ValidationError.""" + with pytest.raises(ValidationError, match="cannot be empty"): + generate_batch([]) + + def test_batch_generates_multiple_images( + self, + tmp_path: Path, + mock_genai_response: MagicMock, + ) -> None: + """Test that batch generates multiple images.""" + mock_genai = MagicMock() + mock_types = MagicMock() + + mock_client = MagicMock() + mock_client.models.generate_content.return_value = mock_genai_response + mock_genai.Client.return_value = mock_client + + prompts = [ + {"prompt": "A sunset over mountains"}, + {"prompt": "A forest in autumn"}, + ] + + with ( + patch.object(client_module, "_genai", mock_genai), + patch.object(client_module, "_types", mock_types), + patch.dict(os.environ, {"GEMINI_API_KEY": "test-key"}), + ): + results = generate_batch( + prompts=prompts, + output_dir=tmp_path, + document=False, + show_progress=False, + ) + + assert len(results) == 2 + for result in results: + assert result is not None + assert result.exists() + + def test_batch_handles_invalid_prompt( + self, + tmp_path: Path, + mock_genai_response: MagicMock, + ) -> None: + """Test that batch handles invalid prompt configs gracefully.""" + mock_genai = MagicMock() + mock_types = MagicMock() + + mock_client = MagicMock() + mock_client.models.generate_content.return_value = mock_genai_response + mock_genai.Client.return_value = mock_client + + prompts = [ + {"prompt": "Valid prompt"}, + {"no_prompt_key": "Invalid"}, # Missing 'prompt' key + {"prompt": "Another valid prompt"}, + ] + + with ( + patch.object(client_module, "_genai", mock_genai), + patch.object(client_module, "_types", mock_types), + patch.dict(os.environ, {"GEMINI_API_KEY": "test-key"}), + ): + results = generate_batch( + prompts=prompts, + output_dir=tmp_path, + document=False, + show_progress=False, + ) + + # Should have 3 results: 2 successful, 1 None for invalid + assert len(results) == 3 + assert results[0] is not None + assert results[1] is None # Invalid prompt + assert results[2] is not None + + def test_batch_resume_skips_existing( + self, + tmp_path: Path, + sample_image_bytes: bytes, + mock_genai_response: MagicMock, + ) -> None: + """Test that batch resume skips existing output files.""" + mock_genai = MagicMock() + mock_types = MagicMock() + + mock_client = MagicMock() + mock_client.models.generate_content.return_value = mock_genai_response + mock_genai.Client.return_value = mock_client + + # Create existing file + existing = tmp_path / "existing.png" + existing.write_bytes(sample_image_bytes) + + prompts = [ + {"prompt": "New image"}, + {"prompt": "Should skip", "output_path": str(existing)}, + ] + + with ( + patch.object(client_module, "_genai", mock_genai), + patch.object(client_module, "_types", mock_types), + patch.dict(os.environ, {"GEMINI_API_KEY": "test-key"}), + ): + results = generate_batch( + prompts=prompts, + output_dir=tmp_path, + resume=True, + document=False, + show_progress=False, + ) + + assert len(results) == 2 + # The existing file should be returned as-is + assert results[1] == existing + # The API should only be called once (for the new image) + assert mock_client.models.generate_content.call_count == 1 diff --git a/uv.lock b/uv.lock index 08e4c08..98f473d 100644 --- a/uv.lock +++ b/uv.lock @@ -441,6 +441,7 @@ source = { editable = "packages/gemini-image" } dependencies = [ { name = "google-genai" }, { name = "python-dotenv" }, + { name = "rich" }, { name = "structlog" }, { name = "tenacity" }, ] @@ -459,6 +460,7 @@ requires-dist = [ { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.21.0" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.1.0" }, { name = "python-dotenv", specifier = ">=1.0.0" }, + { name = "rich", specifier = ">=13.0.0" }, { name = "structlog", specifier = ">=24.1.0" }, { name = "tenacity", specifier = ">=8.2.0" }, ] From d1e7eca75332dfda97c704d18b7d1f04508b927c Mon Sep 17 00:00:00 2001 From: Byron Williams Date: Fri, 9 Jan 2026 13:42:54 -0800 Subject: [PATCH 03/11] test(gemini-image): add functional tests with real Gemini API Add comprehensive functional tests that make real API calls to verify the image generation system works end-to-end: Functional Tests (10 tests): - test_generate_simple_image: Basic image generation - test_generate_image_with_aspect_ratio: Pro model with 16:9 aspect - test_generate_draft_image: Draft mode at 1K resolution - test_generate_image_with_metadata: JSON sidecar file creation - test_generate_image_with_registry: PROMPTS.md registry entry - test_batch_two_images: Batch processing two prompts - test_batch_with_different_models: Batch with mixed model settings - test_story_sequence_two_parts: 2-part story generation - test_story_sequence_resume: Resume skips existing parts - test_format_detection_matches_extension: Format detection validation Bug Fix: - Fix story resume logic to find existing images with any extension - Previously looked for .png only, but files may be saved as .jpg due to magic byte format detection and correction - Add _find_existing_image() helper to check all image extensions Test Infrastructure: - Add @functional pytest marker for integration tests - Add --run-functional CLI option - Skip functional tests when GEMINI_API_KEY not configured - Add pytest.ini_options to pyproject.toml Co-Authored-By: Claude Opus 4.5 --- packages/gemini-image/pyproject.toml | 8 + .../src/gemini_image/generator.py | 43 ++- packages/gemini-image/tests/conftest.py | 18 ++ .../gemini-image/tests/test_functional.py | 305 ++++++++++++++++++ packages/gemini-image/tests/test_generator.py | 3 +- 5 files changed, 368 insertions(+), 9 deletions(-) create mode 100644 packages/gemini-image/tests/test_functional.py diff --git a/packages/gemini-image/pyproject.toml b/packages/gemini-image/pyproject.toml index 05093cd..474b5fd 100644 --- a/packages/gemini-image/pyproject.toml +++ b/packages/gemini-image/pyproject.toml @@ -95,3 +95,11 @@ ignore = [ "tests/*" = ["S101", "PLR2004", "ANN"] # CLI module can use print statements "src/gemini_image/cli.py" = ["T201"] + +[tool.pytest.ini_options] +markers = [ + "functional: marks tests as functional (requiring GEMINI_API_KEY)", +] +filterwarnings = [ + "ignore::DeprecationWarning", +] diff --git a/packages/gemini-image/src/gemini_image/generator.py b/packages/gemini-image/src/gemini_image/generator.py index b45a05c..53ac2b4 100644 --- a/packages/gemini-image/src/gemini_image/generator.py +++ b/packages/gemini-image/src/gemini_image/generator.py @@ -434,16 +434,20 @@ def generate_story_sequence( ) for part_num in range(1, num_parts + 1): - # Build output path for this part - output_path = output_dir / f"{output_prefix.stem}_part{part_num}.png" - - # Check if we should resume from existing - if resume and output_path.exists(): - logger.info("skipping_existing_part", part=part_num, path=str(output_path)) - generated_images.append(output_path) - previous_image_path = output_path + # Build output path for this part (base name, extension may change) + base_output_path = output_dir / f"{output_prefix.stem}_part{part_num}" + + # Check if we should resume from existing (check multiple extensions) + existing_path = _find_existing_image(base_output_path) + if resume and existing_path is not None: + logger.info("skipping_existing_part", part=part_num, path=str(existing_path)) + generated_images.append(existing_path) + previous_image_path = existing_path continue + # Default to .png for output (will be corrected if needed) + output_path = base_output_path.with_suffix(".png") + # Build prompt for this part part_prompt = _build_story_prompt(base_prompt, part_num, num_parts) @@ -586,6 +590,29 @@ def finalize_draft( # --- Helper Functions --- +# Supported image extensions for resume checks +_IMAGE_EXTENSIONS = (".png", ".jpg", ".jpeg", ".gif", ".webp") + + +def _find_existing_image(base_path: Path) -> Path | None: + """Find an existing image file with any supported extension. + + This handles the case where a file was saved with a different extension + than expected (e.g., .jpg instead of .png due to magic byte detection). + + Args: + base_path: Base path without extension (e.g., /path/to/image_part1). + + Returns: + Path to the existing file if found, None otherwise. + + """ + for ext in _IMAGE_EXTENSIONS: + candidate = base_path.with_suffix(ext) + if candidate.exists(): + return candidate + return None + def _validate_model_key(model_key: str) -> None: """Validate that the model key is valid.""" diff --git a/packages/gemini-image/tests/conftest.py b/packages/gemini-image/tests/conftest.py index c4e0208..158acef 100644 --- a/packages/gemini-image/tests/conftest.py +++ b/packages/gemini-image/tests/conftest.py @@ -12,6 +12,24 @@ from pathlib import Path +def pytest_configure(config: pytest.Config) -> None: + """Register custom pytest markers.""" + config.addinivalue_line( + "markers", + "functional: marks tests as functional (requiring GEMINI_API_KEY)", + ) + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Add custom command line options.""" + parser.addoption( + "--run-functional", + action="store_true", + default=False, + help="Run functional tests that require GEMINI_API_KEY", + ) + + @pytest.fixture def sample_image_bytes() -> bytes: """Return sample PNG image bytes (1x1 red pixel).""" diff --git a/packages/gemini-image/tests/test_functional.py b/packages/gemini-image/tests/test_functional.py new file mode 100644 index 0000000..279c914 --- /dev/null +++ b/packages/gemini-image/tests/test_functional.py @@ -0,0 +1,305 @@ +"""Functional tests with real Gemini API calls. + +These tests make real API calls and are skipped by default. +To run them, ensure GEMINI_API_KEY is set and use: + + pytest tests/test_functional.py -v --run-functional + +Or set the GEMINI_API_KEY environment variable and use: + + pytest tests/test_functional.py -v -m functional + +""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import pytest + +from gemini_image import generate_batch, generate_image, generate_story_sequence +from gemini_image.client import get_api_key +from gemini_image.exceptions import ConfigurationError +from gemini_image.io import detect_image_format, validate_image_file + +if TYPE_CHECKING: + pass + +# Check if API key is available +def _has_api_key() -> bool: + """Check if Gemini API key is configured.""" + try: + get_api_key() + return True + except ConfigurationError: + return False + + +# Skip marker for functional tests +requires_api_key = pytest.mark.skipif( + not _has_api_key(), + reason="GEMINI_API_KEY not configured - skipping functional tests", +) + +# Custom marker for functional tests +functional = pytest.mark.functional + + +@requires_api_key +@functional +class TestGenerateImageFunctional: + """Functional tests for generate_image with real API calls.""" + + def test_generate_simple_image(self, tmp_path: Path) -> None: + """Test generating a simple image with default settings.""" + result = generate_image( + prompt="A simple red square on white background", + output_dir=tmp_path, + document=False, + save_metadata_file=False, + ) + + assert result is not None, "Image generation returned None" + assert result.exists(), f"Image file not created at {result}" + assert result.stat().st_size > 0, "Image file is empty" + + # Verify it's a valid image + image_format, size = validate_image_file(result) + assert image_format in ("png", "jpeg"), f"Unexpected format: {image_format}" + assert size > 100, f"Image too small: {size} bytes" + + print(f"\n✓ Generated image: {result} ({size} bytes, {image_format})") + + def test_generate_image_with_aspect_ratio(self, tmp_path: Path) -> None: + """Test generating an image with specific aspect ratio.""" + result = generate_image( + prompt="A horizontal landscape with mountains", + model_key="pro", + aspect_ratio="16:9", + output_dir=tmp_path, + document=False, + save_metadata_file=False, + ) + + assert result is not None, "Image generation returned None" + assert result.exists(), f"Image file not created at {result}" + + image_format, size = validate_image_file(result) + print(f"\n✓ Generated 16:9 image: {result} ({size} bytes, {image_format})") + + def test_generate_draft_image(self, tmp_path: Path) -> None: + """Test generating a draft image at 1K resolution.""" + result = generate_image( + prompt="A draft sketch of a house", + is_draft=True, + output_dir=tmp_path, + document=False, + save_metadata_file=False, + ) + + assert result is not None, "Draft generation returned None" + assert result.exists(), f"Draft file not created at {result}" + assert "draft_" in result.name, f"Draft filename should contain 'draft_': {result.name}" + + image_format, size = validate_image_file(result) + print(f"\n✓ Generated draft: {result} ({size} bytes, {image_format})") + + def test_generate_image_with_metadata(self, tmp_path: Path) -> None: + """Test that metadata sidecar file is created.""" + result = generate_image( + prompt="A test image for metadata", + output_dir=tmp_path, + document=False, + save_metadata_file=True, + ) + + assert result is not None, "Image generation returned None" + + # Check for metadata file + metadata_path = result.with_suffix(".json") + assert metadata_path.exists(), f"Metadata file not created at {metadata_path}" + + import json + + with open(metadata_path) as f: + metadata = json.load(f) + + assert "prompt" in metadata, "Metadata missing 'prompt' field" + assert "model" in metadata, "Metadata missing 'model' field" + assert "created_at" in metadata, "Metadata missing 'created_at' field" + assert metadata["prompt"] == "A test image for metadata" + + print(f"\n✓ Generated with metadata: {result}") + print(f" Metadata: {metadata_path}") + + def test_generate_image_with_registry(self, tmp_path: Path) -> None: + """Test that PROMPTS.md registry entry is created.""" + result = generate_image( + prompt="A test image for registry", + output_dir=tmp_path, + document=True, + save_metadata_file=False, + ) + + assert result is not None, "Image generation returned None" + + # Check for registry file + registry_path = tmp_path / "PROMPTS.md" + assert registry_path.exists(), f"Registry file not created at {registry_path}" + + content = registry_path.read_text() + assert "A test image for registry" in content, "Prompt not found in registry" + assert result.name in content, "Image filename not found in registry" + + print(f"\n✓ Generated with registry: {result}") + print(f" Registry: {registry_path}") + + +@requires_api_key +@functional +class TestGenerateBatchFunctional: + """Functional tests for batch generation with real API calls.""" + + def test_batch_two_images(self, tmp_path: Path) -> None: + """Test generating a batch of two images.""" + prompts = [ + {"prompt": "A red circle"}, + {"prompt": "A blue triangle"}, + ] + + results = generate_batch( + prompts=prompts, + output_dir=tmp_path, + document=False, + show_progress=False, + ) + + assert len(results) == 2, f"Expected 2 results, got {len(results)}" + + successful = [r for r in results if r is not None] + assert len(successful) == 2, f"Expected 2 successful, got {len(successful)}" + + for i, result in enumerate(results): + assert result is not None, f"Result {i} is None" + assert result.exists(), f"Image {i} not created at {result}" + image_format, size = validate_image_file(result) + print(f"\n✓ Batch image {i + 1}: {result} ({size} bytes, {image_format})") + + def test_batch_with_different_models(self, tmp_path: Path) -> None: + """Test batch with different model settings.""" + prompts = [ + {"prompt": "A simple star shape", "model_key": "flash"}, + {"prompt": "A detailed flower", "model_key": "pro", "aspect_ratio": "1:1"}, + ] + + results = generate_batch( + prompts=prompts, + output_dir=tmp_path, + document=False, + show_progress=False, + ) + + assert len(results) == 2, f"Expected 2 results, got {len(results)}" + + for i, result in enumerate(results): + if result is not None: + image_format, size = validate_image_file(result) + print(f"\n✓ Batch image {i + 1}: {result} ({size} bytes)") + + +@requires_api_key +@functional +class TestGenerateStorySequenceFunctional: + """Functional tests for story sequence generation with real API calls.""" + + def test_story_sequence_two_parts(self, tmp_path: Path) -> None: + """Test generating a 2-part story sequence.""" + results = generate_story_sequence( + base_prompt="A simple shape that transforms: first a circle, then becoming a square", + num_parts=2, + output_dir=tmp_path, + document=False, + ) + + assert len(results) == 2, f"Expected 2 parts, got {len(results)}" + + for i, result in enumerate(results, 1): + assert result.exists(), f"Part {i} not created at {result}" + assert f"part{i}" in result.name, f"Part {i} filename missing 'part{i}': {result.name}" + image_format, size = validate_image_file(result) + print(f"\n✓ Story part {i}: {result} ({size} bytes, {image_format})") + + def test_story_sequence_resume(self, tmp_path: Path) -> None: + """Test that story resume skips existing parts.""" + # First, generate part 1 + results1 = generate_story_sequence( + base_prompt="A growing tree sequence", + num_parts=1, + output_prefix=tmp_path / "tree", + output_dir=tmp_path, + document=False, + ) + + assert len(results1) == 1, "First generation should produce 1 part" + part1_path = results1[0] + part1_size = part1_path.stat().st_size + + # Now try to generate 2 parts with resume - should skip part 1 + results2 = generate_story_sequence( + base_prompt="A growing tree sequence", + num_parts=2, + output_prefix=tmp_path / "tree", + output_dir=tmp_path, + resume=True, + document=False, + ) + + assert len(results2) == 2, f"Expected 2 parts, got {len(results2)}" + + # Part 1 should be the same file (not regenerated) + assert results2[0] == part1_path, "Part 1 path changed" + assert results2[0].stat().st_size == part1_size, "Part 1 was regenerated" + + print("\n✓ Story resume worked: Part 1 was reused, Part 2 was generated") + + +@requires_api_key +@functional +class TestFormatDetectionFunctional: + """Functional tests for format detection with real generated images.""" + + def test_format_detection_matches_extension(self, tmp_path: Path) -> None: + """Test that detected format matches the saved extension.""" + result = generate_image( + prompt="A colorful abstract pattern", + output_dir=tmp_path, + document=False, + save_metadata_file=False, + ) + + assert result is not None, "Image generation returned None" + + # Read the image and detect format + with open(result, "rb") as f: + data = f.read() + + detected_format = detect_image_format(data) + expected_ext = ".png" if detected_format == "png" else ".jpg" + + # The extension should match the detected format (after correction) + actual_ext = result.suffix.lower() + if detected_format == "jpeg": + assert actual_ext in (".jpg", ".jpeg"), f"JPEG should have .jpg/.jpeg extension, got {actual_ext}" + else: + assert actual_ext == expected_ext, f"Extension mismatch: expected {expected_ext}, got {actual_ext}" + + print(f"\n✓ Format detection: {detected_format} -> {actual_ext}") + + +# Pytest configuration for custom markers +def pytest_configure(config): + """Register custom markers.""" + config.addinivalue_line( + "markers", "functional: marks tests as functional (requiring API key)" + ) diff --git a/packages/gemini-image/tests/test_generator.py b/packages/gemini-image/tests/test_generator.py index 094a783..7ab6b2e 100644 --- a/packages/gemini-image/tests/test_generator.py +++ b/packages/gemini-image/tests/test_generator.py @@ -37,12 +37,13 @@ def test_generate_image_missing_api_key_raises(self) -> None: mock_genai = MagicMock() mock_types = MagicMock() + # Need to mock load_dotenv to prevent it from loading .env file with ( patch.object(client_module, "_genai", mock_genai), patch.object(client_module, "_types", mock_types), + patch("gemini_image.client.load_dotenv"), # Prevent .env loading patch.dict(os.environ, {}, clear=True), ): - os.environ.pop("GEMINI_API_KEY", None) with pytest.raises(ConfigurationError, match="GEMINI_API_KEY"): generate_image("test prompt") From ca1044ddaf9cf19d1e68296050ad7c390684cde7 Mon Sep 17 00:00:00 2001 From: Byron Williams Date: Fri, 9 Jan 2026 14:04:34 -0800 Subject: [PATCH 04/11] docs(gemini-image): update README with v0.2.0 features - Add batch processing documentation and JSON file format - Document PROMPTS.md registry feature - Add exception handling examples - Update API reference with all function signatures - Add development section with setup commands - Document resolution options and aspect ratios Co-Authored-By: Claude Opus 4.5 --- packages/gemini-image/README.md | 137 +++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 3 deletions(-) diff --git a/packages/gemini-image/README.md b/packages/gemini-image/README.md index 3a87faf..156d759 100644 --- a/packages/gemini-image/README.md +++ b/packages/gemini-image/README.md @@ -9,6 +9,10 @@ A comprehensive image generation library built on Google's Gemini models (Nano B - **Multi-part story generation** - sequential images with visual continuity - **Draft-then-finalize workflow** - 75% cost reduction during iteration - **Thinking mode** - visualize model reasoning with intermediate images +- **Batch processing** - generate multiple images from a JSON file +- **PROMPTS.md registry** - automatic documentation of all generations +- **Magic byte format detection** - automatic extension correction for API mismatches +- **Retry logic** - automatic retry with exponential backoff on transient failures ## Installation @@ -24,6 +28,14 @@ pip install byronwilliamscpa-gemini-image ### Set API Key +Create a `.env` file in your project directory: + +```bash +GEMINI_API_KEY=your-api-key-here +``` + +Or set the environment variable: + ```bash export GEMINI_API_KEY='your-api-key' ``` @@ -31,7 +43,7 @@ export GEMINI_API_KEY='your-api-key' ### Python API ```python -from gemini_image import generate_image, generate_story_sequence +from gemini_image import generate_image, generate_story_sequence, generate_batch # Basic text-to-image result = generate_image("A futuristic city at sunset") @@ -59,13 +71,19 @@ edited = generate_image( ) # Multi-part story sequence -from gemini_image import generate_story_sequence - images = generate_story_sequence( "A journey through data governance evolution", num_parts=3, aspect_ratio="16:9", ) + +# Batch processing +prompts = [ + {"prompt": "A sunset over mountains", "aspect_ratio": "16:9"}, + {"prompt": "A forest in autumn", "model_key": "pro"}, + {"prompt": "A city at night", "image_size": "2K"}, +] +results = generate_batch(prompts, output_dir=Path("./images")) ``` ### Command Line @@ -89,6 +107,15 @@ gemini-image "Make the building taller" -r blueprint.png # Multi-part story gemini-image "Evolution of a data platform" --story-parts 4 -o evolution +# Resume interrupted story generation +gemini-image "Continue story" --story-parts 5 -o story --resume + +# Batch processing from JSON file +gemini-image --batch prompts.json -d ./output + +# Disable PROMPTS.md documentation (for privacy) +gemini-image "Private prompt" --no-document -o private.png + # Show thinking process gemini-image "Complex blueprint design" --save-thoughts --verbose @@ -134,6 +161,49 @@ gemini-image "Add more detail to the header" -r draft.png --draft-mode -o draft_ gemini-image --finalize draft_v2.png --size 2K -o final.png ``` +## Batch Processing + +Create a JSON file with prompts: + +```json +[ + {"prompt": "A sunset over mountains", "aspect_ratio": "16:9"}, + {"prompt": "A forest in autumn", "model_key": "pro", "image_size": "2K"}, + {"prompt": "A city at night", "output_path": "city.png"} +] +``` + +Then process: + +```bash +gemini-image --batch prompts.json -d ./output +``` + +Supported fields per prompt: + +- `prompt` (required): Text description +- `output_path`: Specific output filename +- `model_key`: "flash" or "pro" +- `aspect_ratio`: "1:1", "3:4", "4:3", "9:16", "16:9" +- `image_size`: "1K", "2K", "4K" +- `reference_images`: List of reference image paths + +## PROMPTS.md Registry + +Every generation is automatically logged to `PROMPTS.md` in the output directory: + +```markdown +## Generation Log + +### 2026-01-09 13:45:22 - generated_20260109_134522.jpg +- **Prompt**: A futuristic city at sunset +- **Model**: pro +- **Size**: 2K +- **Aspect**: 16:9 +``` + +Disable with `--no-document` flag or `document=False` parameter. + ## API Reference ### `generate_image()` @@ -151,9 +221,25 @@ def generate_image( save_thoughts: bool = False, verbose: bool = False, is_draft: bool = False, + document: bool = True, # Log to PROMPTS.md + registry_path: Path | None = None, + save_metadata_file: bool = True, # Save JSON sidecar ) -> Path | None: ``` +### `generate_batch()` + +```python +def generate_batch( + prompts: list[dict[str, object]], + output_dir: Path | None = None, + parallel: int = 1, + resume: bool = True, + document: bool = True, + show_progress: bool = True, +) -> list[Path | None]: +``` + ### `generate_story_sequence()` ```python @@ -166,6 +252,8 @@ def generate_story_sequence( aspect_ratio: AspectRatio | None = None, image_size: ImageSize | None = None, verbose: bool = False, + resume: bool = True, # Skip existing parts + document: bool = True, ) -> list[Path]: ``` @@ -181,9 +269,52 @@ def finalize_draft( aspect_ratio: AspectRatio | None = None, image_size: ImageSize | None = None, verbose: bool = False, + document: bool = True, ) -> Path | None: ``` +## Exception Handling + +```python +from gemini_image import ( + GeminiImageError, # Base exception + ConfigurationError, # Missing API key + ValidationError, # Invalid parameters + APIError, # API errors + RateLimitError, # Rate limiting + ContentBlockedError, # Safety filter + FileOperationError, # File I/O errors + FormatDetectionError, # Unknown image format +) + +try: + result = generate_image("A sunset") +except RateLimitError: + print("Rate limited, retry automatically handled") +except ContentBlockedError: + print("Content blocked by safety filters") +except GeminiImageError as e: + print(f"Generation failed: {e}") +``` + +## Development + +```bash +# Clone and install +git clone https://github.com/ByronWilliamsCPA/python-libs.git +cd python-libs/packages/gemini-image +uv sync --all-extras + +# Run tests +uv run pytest tests -v + +# Run functional tests (requires API key) +uv run pytest tests/test_functional.py -v + +# Lint +uv run ruff check src tests +``` + ## License MIT From 228f081201e07689562a09af03cfdfd98a8b789a Mon Sep 17 00:00:00 2001 From: Byron Williams Date: Fri, 9 Jan 2026 14:05:30 -0800 Subject: [PATCH 05/11] docs(gemini-image): add comprehensive USAGE_GUIDE.md - Installation and configuration instructions - Model selection and resolution options - Reference-based editing examples - Multi-part story sequence workflow - Batch processing with JSON format - Draft-then-finalize cost optimization pattern - PROMPTS.md registry documentation - Exception handling and troubleshooting guide - Best practices for production use Co-Authored-By: Claude Opus 4.5 --- packages/gemini-image/docs/USAGE_GUIDE.md | 438 ++++++++++++++++++++++ 1 file changed, 438 insertions(+) create mode 100644 packages/gemini-image/docs/USAGE_GUIDE.md diff --git a/packages/gemini-image/docs/USAGE_GUIDE.md b/packages/gemini-image/docs/USAGE_GUIDE.md new file mode 100644 index 0000000..882dc8b --- /dev/null +++ b/packages/gemini-image/docs/USAGE_GUIDE.md @@ -0,0 +1,438 @@ +# Gemini Image Usage Guide + +A comprehensive guide to using the gemini-image library for AI-powered image generation. + +## Table of Contents + +1. [Installation](#installation) +2. [Configuration](#configuration) +3. [Basic Usage](#basic-usage) +4. [Advanced Features](#advanced-features) +5. [Workflow Patterns](#workflow-patterns) +6. [Troubleshooting](#troubleshooting) + +## Installation + +### Using uv (recommended) + +```bash +uv add byronwilliamscpa-gemini-image +``` + +### Using pip + +```bash +pip install byronwilliamscpa-gemini-image +``` + +## Configuration + +### API Key Setup + +The library requires a Google Gemini API key. You can obtain one from the +[Google AI Studio](https://makersuite.google.com/app/apikey). + +**Option 1: Environment variable** + +```bash +export GEMINI_API_KEY='your-api-key-here' +``` + +**Option 2: .env file** + +Create a `.env` file in your project directory: + +```bash +GEMINI_API_KEY=your-api-key-here +``` + +The library automatically loads `.env` files using python-dotenv. + +## Basic Usage + +### Python API + +```python +from gemini_image import generate_image + +# Simple text-to-image generation +result = generate_image("A futuristic city at sunset") +print(f"Image saved to: {result}") +``` + +### Command Line + +```bash +# Basic generation +gemini-image "A serene mountain landscape at dawn" + +# With custom output path +gemini-image "A technical blueprint" -o blueprint.png +``` + +## Advanced Features + +### Model Selection + +Two models are available: + +| Key | Model | Best For | +|-----|-------|----------| +| `flash` | Gemini 2.5 Flash | Fast iterations, drafts | +| `pro` | Gemini 3 Pro | High quality, text rendering, 4K | + +```python +# Using Flash model for speed +result = generate_image("Quick sketch", model_key="flash") + +# Using Pro model for quality (default) +result = generate_image("Detailed illustration", model_key="pro") +``` + +### Resolution Control + +The Pro model supports multiple resolutions: + +| Size | 16:9 Dimensions | Cost Factor | +|------|-----------------|-------------| +| 1K | ~1408 x 768 | 1x (draft) | +| 2K | 2752 x 1536 | 2x | +| 4K | 5504 x 3072 | 4x | + +```python +# Generate at 4K resolution +result = generate_image( + "A detailed landscape", + image_size="4K", + aspect_ratio="16:9", +) +``` + +### Aspect Ratios + +Supported aspect ratios: + +- `1:1` - Square (social media, profile images) +- `3:4` - Portrait (documents, posters) +- `4:3` - Standard landscape (presentations) +- `9:16` - Vertical/mobile (stories, mobile apps) +- `16:9` - Widescreen (documents, videos) + +```python +# Square image for social media +result = generate_image("Profile avatar", aspect_ratio="1:1") + +# Vertical for mobile +result = generate_image("Mobile wallpaper", aspect_ratio="9:16") +``` + +### Reference-Based Editing + +Modify existing images with prompts: + +```python +from pathlib import Path + +# Edit an existing image +edited = generate_image( + "Make the sky more dramatic", + reference_images=[Path("original.png")], +) +``` + +CLI: + +```bash +gemini-image "Add more clouds" -r original.png -o edited.png +``` + +### Multi-Part Story Sequences + +Generate a series of related images with visual continuity: + +```python +from gemini_image import generate_story_sequence + +# Generate a 4-part story +images = generate_story_sequence( + "The evolution of a seed growing into a tree", + num_parts=4, + aspect_ratio="16:9", +) + +for i, path in enumerate(images, 1): + print(f"Part {i}: {path}") +``` + +CLI: + +```bash +# Generate story sequence +gemini-image "A day in the life of a city" --story-parts 4 -o city_story + +# Resume interrupted sequence +gemini-image "Continue the story" --story-parts 6 -o city_story --resume +``` + +### Batch Processing + +Process multiple prompts from a JSON file: + +**prompts.json:** + +```json +[ + { + "prompt": "A sunrise over mountains", + "aspect_ratio": "16:9", + "image_size": "2K" + }, + { + "prompt": "A forest path in autumn", + "model_key": "pro", + "output_path": "forest.png" + }, + { + "prompt": "A city skyline at night", + "aspect_ratio": "1:1" + } +] +``` + +**Python:** + +```python +import json +from pathlib import Path +from gemini_image import generate_batch + +with open("prompts.json") as f: + prompts = json.load(f) + +results = generate_batch( + prompts=prompts, + output_dir=Path("./output"), + show_progress=True, +) + +for prompt, result in zip(prompts, results): + if result: + print(f"Generated: {result}") + else: + print(f"Failed: {prompt['prompt'][:30]}...") +``` + +**CLI:** + +```bash +gemini-image --batch prompts.json -d ./output +``` + +### Batch Fields Reference + +| Field | Type | Description | +|-------|------|-------------| +| `prompt` | str | Text description (required) | +| `output_path` | str | Specific output filename | +| `model_key` | str | "flash" or "pro" | +| `aspect_ratio` | str | "1:1", "3:4", "4:3", "9:16", "16:9" | +| `image_size` | str | "1K", "2K", "4K" | +| `reference_images` | list | Paths to reference images | + +## Workflow Patterns + +### Draft-Then-Finalize Workflow + +Save ~75% on costs during iteration: + +```python +from pathlib import Path +from gemini_image import generate_image, finalize_draft + +# 1. Generate draft at 1K (fast, cheap) +draft = generate_image( + "A technical architecture diagram", + is_draft=True, +) + +# 2. Review and iterate on draft +draft_v2 = generate_image( + "Add more detail to the database section", + reference_images=[draft], + is_draft=True, +) + +# 3. Finalize at higher resolution when satisfied +final = finalize_draft( + draft_v2, + image_size="2K", +) +``` + +CLI workflow: + +```bash +# Draft mode +gemini-image "Technical blueprint" --draft-mode -o draft.png + +# Iterate +gemini-image "Add annotations" -r draft.png --draft-mode -o draft_v2.png + +# Finalize +gemini-image --finalize draft_v2.png --size 2K -o final.png +``` + +### PROMPTS.md Registry + +Every generation is automatically logged to `PROMPTS.md`: + +```markdown +## Generation Log + +### 2026-01-09 14:30:22 - architecture_diagram.png +- **Prompt**: A technical architecture diagram showing microservices +- **Model**: pro +- **Size**: 2K +- **Aspect**: 16:9 +``` + +**Disable for privacy:** + +```python +result = generate_image("Private prompt", document=False) +``` + +```bash +gemini-image "Private prompt" --no-document -o private.png +``` + +### Metadata Sidecar Files + +Each image can have a JSON metadata file: + +```python +# Enable metadata file (default) +result = generate_image( + "Test prompt", + save_metadata_file=True, # Creates result.json alongside result.png +) +``` + +The metadata file contains: + +```json +{ + "prompt": "Test prompt", + "model": "pro", + "aspect_ratio": "16:9", + "image_size": "2K", + "created_at": "2026-01-09T14:30:22Z" +} +``` + +### Thinking Mode + +Visualize the model's reasoning process: + +```bash +gemini-image "Complex technical diagram" --save-thoughts --verbose +``` + +This saves intermediate thinking images alongside the final result. + +## Troubleshooting + +### Common Errors + +**ConfigurationError: GEMINI_API_KEY not found** + +```python +# Solution: Set the environment variable +import os +os.environ["GEMINI_API_KEY"] = "your-key" +``` + +**RateLimitError** + +The library automatically retries with exponential backoff. If you still hit limits: + +```python +import time + +for prompt in prompts: + try: + result = generate_image(prompt) + except RateLimitError: + time.sleep(60) # Wait before retrying + result = generate_image(prompt) +``` + +**ContentBlockedError** + +The prompt triggered safety filters. Try rephrasing: + +```python +from gemini_image.exceptions import ContentBlockedError + +try: + result = generate_image("potentially sensitive prompt") +except ContentBlockedError: + print("Prompt was blocked by safety filters. Try rephrasing.") +``` + +**FormatDetectionError** + +The API returned an unknown image format: + +```python +from gemini_image.exceptions import FormatDetectionError + +try: + result = generate_image("test") +except FormatDetectionError as e: + print(f"Unknown format: {e}") +``` + +### Debug Logging + +Enable verbose output for debugging: + +```python +result = generate_image( + "Test prompt", + verbose=True, # Shows detailed progress +) +``` + +CLI: + +```bash +gemini-image "Test prompt" --verbose +``` + +### Checking Available Models + +```bash +gemini-image --list-models +``` + +## Exception Reference + +| Exception | Cause | Solution | +|-----------|-------|----------| +| `ConfigurationError` | Missing API key | Set GEMINI_API_KEY | +| `ValidationError` | Invalid parameters | Check model_key, aspect_ratio, etc. | +| `APIError` | API returned error | Check prompt, retry | +| `RateLimitError` | Too many requests | Wait and retry | +| `ContentBlockedError` | Safety filter triggered | Rephrase prompt | +| `FileOperationError` | File I/O error | Check permissions | +| `FormatDetectionError` | Unknown image format | Report bug | + +## Best Practices + +1. **Use draft mode** for iteration - save costs and time +2. **Be specific** in prompts - detailed prompts yield better results +3. **Use reference images** for consistent style +4. **Enable registry** for prompt history tracking +5. **Handle exceptions** gracefully in production code +6. **Use batch processing** for multiple generations From f2b579318735ffe3e13a67983317c954668b4dc0 Mon Sep 17 00:00:00 2001 From: Byron Williams Date: Fri, 9 Jan 2026 14:06:56 -0800 Subject: [PATCH 06/11] feat(gemini-image): add Claude agent integration and validation Phase 5 Implementation: - Add agents/diagram-specialist.md for technical diagram generation - Add scripts/validate_installation.py for installation verification - Include prompt engineering guidelines and workflow examples - Document batch documentation generation patterns Co-Authored-By: Claude Opus 4.5 --- .../gemini-image/agents/diagram-specialist.md | 287 ++++++++++++++++++ .../scripts/validate_installation.py | 209 +++++++++++++ 2 files changed, 496 insertions(+) create mode 100644 packages/gemini-image/agents/diagram-specialist.md create mode 100755 packages/gemini-image/scripts/validate_installation.py diff --git a/packages/gemini-image/agents/diagram-specialist.md b/packages/gemini-image/agents/diagram-specialist.md new file mode 100644 index 0000000..2303c15 --- /dev/null +++ b/packages/gemini-image/agents/diagram-specialist.md @@ -0,0 +1,287 @@ +# Diagram Specialist Agent + +A specialized Claude Code agent for generating professional technical diagrams, blueprints, +and visual documentation using the gemini-image library. + +## Agent Profile + +```yaml +name: diagram-specialist +description: Generates professional technical diagrams and visual documentation +tools: + - gemini-image CLI + - Read/Write for prompt refinement + - Bash for image management +``` + +## Capabilities + +1. **Architecture Diagrams** - System architecture, microservices, cloud infrastructure +2. **Data Flow Diagrams** - ETL pipelines, data governance flows, integration patterns +3. **Process Diagrams** - Business processes, workflows, decision trees +4. **Technical Blueprints** - API designs, database schemas, network topologies +5. **Organizational Charts** - Team structures, reporting hierarchies +6. **Timeline Visualizations** - Project roadmaps, implementation phases + +## Usage Patterns + +### Single Diagram Generation + +```bash +# Generate a simple architecture diagram +gemini-image "A microservices architecture diagram showing API gateway, auth service, user service, and database layer with clear labels" \ + --model pro \ + --aspect 16:9 \ + --size 2K \ + -o architecture.png +``` + +### Draft-Then-Finalize Workflow + +For complex diagrams requiring iteration: + +```bash +# 1. Quick draft for structure validation +gemini-image "Data governance lifecycle diagram with data collection, classification, storage, and archival stages" \ + --draft-mode \ + -o governance_draft.png + +# 2. Review and refine +gemini-image "Refine the diagram: add compliance checkpoints between each stage, use blue color scheme" \ + -r governance_draft.png \ + --draft-mode \ + -o governance_v2.png + +# 3. Finalize at production quality +gemini-image --finalize governance_v2.png --size 2K -o governance_final.png +``` + +### Multi-Part Documentation Set + +Generate a series of related diagrams: + +```bash +gemini-image "A data platform architecture evolution: from monolithic to microservices to serverless" \ + --story-parts 3 \ + --aspect 16:9 \ + -o platform_evolution +``` + +### Batch Documentation Generation + +Create a JSON file for systematic documentation: + +```json +[ + { + "prompt": "System context diagram showing external users, APIs, and third-party integrations", + "aspect_ratio": "16:9", + "output_path": "01_context.png" + }, + { + "prompt": "Container diagram showing web app, API layer, message queue, and databases", + "aspect_ratio": "16:9", + "output_path": "02_containers.png" + }, + { + "prompt": "Component diagram showing authentication, authorization, and user management modules", + "aspect_ratio": "16:9", + "output_path": "03_components.png" + }, + { + "prompt": "Deployment diagram showing Kubernetes pods, load balancers, and cloud services", + "aspect_ratio": "16:9", + "output_path": "04_deployment.png" + } +] +``` + +```bash +gemini-image --batch c4_diagrams.json -d ./docs/diagrams +``` + +## Prompt Engineering Guidelines + +### Effective Diagram Prompts + +**DO:** + +- Specify diagram type explicitly: "architecture diagram", "flowchart", "sequence diagram" +- Include all components and their relationships +- Request clear labels and annotations +- Specify color schemes or visual styles +- Mention target audience (technical, executive, etc.) + +**DON'T:** + +- Use vague descriptions like "a nice diagram" +- Omit relationship types between components +- Forget to specify text readability requirements +- Request too many elements in one diagram + +### Prompt Templates + +**Architecture Diagram:** + +```text +A [type] architecture diagram showing: +- [Component 1] with [responsibilities] +- [Component 2] connected to [Component 1] via [connection type] +- [Component 3] for [purpose] +Use [color scheme] color scheme with clear labels and arrows indicating data flow. +Style: professional, clean, suitable for [audience]. +``` + +**Data Flow Diagram:** + +```text +A data flow diagram illustrating [process name]: +1. [Source] produces [data type] +2. [Processor] transforms data using [method] +3. [Destination] stores/consumes the result +Include validation checkpoints and error handling paths. +Use icons where appropriate. +``` + +**Process Flowchart:** + +```text +A business process flowchart for [process name]: +- Start: [trigger event] +- Decision points: [list key decisions] +- Endpoints: [success and failure outcomes] +Use standard flowchart symbols with clear decision labels. +``` + +## Integration Examples + +### Python Script for Documentation Generation + +```python +#!/usr/bin/env python3 +"""Generate project documentation diagrams.""" + +from pathlib import Path +from gemini_image import generate_batch, generate_image + +# Define documentation structure +docs_dir = Path("./docs/diagrams") +docs_dir.mkdir(parents=True, exist_ok=True) + +# Single high-priority diagram +overview = generate_image( + prompt=""" + System overview diagram showing: + - Web frontend (React) + - API Gateway (Kong) + - Microservices (User, Order, Payment) + - Message Queue (RabbitMQ) + - Databases (PostgreSQL, Redis) + Professional style with technology logos and data flow arrows. + """, + model_key="pro", + aspect_ratio="16:9", + image_size="2K", + output_dir=docs_dir, + output_path=docs_dir / "system_overview.png", +) +print(f"Generated: {overview}") + +# Batch generate supporting diagrams +supporting_diagrams = [ + { + "prompt": "Authentication flow: user login, JWT generation, token validation, session management", + "aspect_ratio": "16:9", + "output_path": "auth_flow.png", + }, + { + "prompt": "Database schema: users, orders, products, payments tables with relationships", + "aspect_ratio": "4:3", + "output_path": "database_schema.png", + }, + { + "prompt": "CI/CD pipeline: code commit, build, test, deploy stages with tool icons", + "aspect_ratio": "16:9", + "output_path": "cicd_pipeline.png", + }, +] + +results = generate_batch( + prompts=supporting_diagrams, + output_dir=docs_dir, + show_progress=True, +) + +for prompt, result in zip(supporting_diagrams, results): + if result: + print(f"Generated: {result}") + else: + print(f"Failed: {prompt['output_path']}") +``` + +### Claude Code Integration + +When used within Claude Code, this agent can: + +1. **Analyze Codebase** - Read code structure and generate accurate diagrams +2. **Update Documentation** - Regenerate diagrams when architecture changes +3. **Validate Consistency** - Ensure diagrams match implementation +4. **Version Control** - Track diagram changes via PROMPTS.md registry + +Example Claude Code workflow: + +```text +User: Generate architecture diagrams for this project + +Agent: +1. Analyze src/ directory structure +2. Identify key components and dependencies +3. Generate context, container, and component diagrams +4. Save to docs/diagrams/ with metadata +5. Update README.md with diagram references +``` + +## Quality Checklist + +Before finalizing diagrams, verify: + +- [ ] All components are labeled clearly +- [ ] Relationships have descriptive labels +- [ ] Color scheme is consistent +- [ ] Text is readable at expected display size +- [ ] Diagram type matches content (flowchart vs. architecture) +- [ ] Level of detail appropriate for audience +- [ ] No overlapping elements +- [ ] Consistent iconography throughout + +## Troubleshooting + +### Text Not Readable + +Use Pro model with 2K+ resolution: + +```bash +gemini-image "..." --model pro --size 2K +``` + +### Too Much Detail + +Split into multiple diagrams using story sequences or batch processing. + +### Inconsistent Style + +Use reference images for style continuity: + +```bash +gemini-image "New diagram in same style" -r existing_style.png +``` + +### Complex Relationships + +Use draft mode to iterate on layout: + +```bash +gemini-image "..." --draft-mode -o draft.png +# Review and refine +gemini-image "Move component X to the left, add more spacing" -r draft.png --draft-mode -o draft_v2.png +``` diff --git a/packages/gemini-image/scripts/validate_installation.py b/packages/gemini-image/scripts/validate_installation.py new file mode 100755 index 0000000..309763a --- /dev/null +++ b/packages/gemini-image/scripts/validate_installation.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 +"""Validate gemini-image installation and configuration. + +Usage: + python scripts/validate_installation.py + # or + uv run python scripts/validate_installation.py +""" + +from __future__ import annotations + +import sys + + +def check_import() -> bool: + """Check if the package can be imported.""" + print("Checking package import...", end=" ") + try: + import gemini_image # noqa: F401 + + print("OK") + return True + except ImportError as e: + print(f"FAILED: {e}") + return False + + +def check_exports() -> bool: + """Check if all expected functions are exported.""" + print("Checking exports...", end=" ") + try: + from gemini_image import ( + finalize_draft, + generate_batch, + generate_image, + generate_story_sequence, + ) + + exports = [generate_image, generate_story_sequence, generate_batch, finalize_draft] + if all(callable(f) for f in exports): + print("OK") + return True + print("FAILED: Some exports are not callable") + return False + except ImportError as e: + print(f"FAILED: {e}") + return False + + +def check_exceptions() -> bool: + """Check if exception classes are available.""" + print("Checking exceptions...", end=" ") + try: + from gemini_image.exceptions import ( + APIError, + ConfigurationError, + ContentBlockedError, + FileOperationError, + FormatDetectionError, + GeminiImageError, + RateLimitError, + ValidationError, + ) + + exceptions = [ + GeminiImageError, + ConfigurationError, + ValidationError, + APIError, + RateLimitError, + ContentBlockedError, + FileOperationError, + FormatDetectionError, + ] + if all(issubclass(e, Exception) for e in exceptions): + print("OK") + return True + print("FAILED: Some exceptions are not proper Exception subclasses") + return False + except ImportError as e: + print(f"FAILED: {e}") + return False + + +def check_api_key() -> bool: + """Check if API key is configured.""" + print("Checking API key configuration...", end=" ") + try: + from gemini_image.client import get_api_key + + key = get_api_key() + if key and len(key) > 10: + # Mask the key for security + masked = f"{key[:4]}...{key[-4:]}" + print(f"OK (key: {masked})") + return True + print("FAILED: API key too short or empty") + return False + except Exception as e: + print(f"FAILED: {e}") + return False + + +def check_cli() -> bool: + """Check if CLI is accessible.""" + print("Checking CLI entry point...", end=" ") + try: + from gemini_image.cli import main + + if callable(main): + print("OK") + return True + print("FAILED: CLI main is not callable") + return False + except ImportError as e: + print(f"FAILED: {e}") + return False + + +def check_models() -> bool: + """Check if model configurations are valid.""" + print("Checking model configurations...", end=" ") + try: + from gemini_image.models import MODELS + + if "flash" in MODELS and "pro" in MODELS: + print(f"OK (models: {', '.join(MODELS.keys())})") + return True + print("FAILED: Expected 'flash' and 'pro' models") + return False + except ImportError as e: + print(f"FAILED: {e}") + return False + + +def check_io_functions() -> bool: + """Check if I/O functions are available.""" + print("Checking I/O functions...", end=" ") + try: + from gemini_image.io import ( + detect_image_format, + save_image, + validate_image_file, + ) + + if all(callable(f) for f in [detect_image_format, save_image, validate_image_file]): + print("OK") + return True + print("FAILED: Some I/O functions are not callable") + return False + except ImportError as e: + print(f"FAILED: {e}") + return False + + +def main() -> int: + """Run all validation checks.""" + print("=" * 50) + print("Gemini Image Installation Validation") + print("=" * 50) + print() + + checks = [ + ("Package Import", check_import), + ("Function Exports", check_exports), + ("Exception Classes", check_exceptions), + ("CLI Entry Point", check_cli), + ("Model Configurations", check_models), + ("I/O Functions", check_io_functions), + ("API Key", check_api_key), # Last since it's optional for basic install + ] + + results: list[tuple[str, bool]] = [] + for name, check_fn in checks: + result = check_fn() + results.append((name, result)) + + print() + print("=" * 50) + print("Summary") + print("=" * 50) + + passed = sum(1 for _, ok in results if ok) + total = len(results) + + for name, ok in results: + status = "PASS" if ok else "FAIL" + print(f" {name}: {status}") + + print() + print(f"Result: {passed}/{total} checks passed") + + # API key is optional for installation validation + core_passed = all(ok for name, ok in results if name != "API Key") + + if core_passed: + print() + if not results[-1][1]: # API key failed + print("Note: API key not configured. Set GEMINI_API_KEY to enable generation.") + print("Installation is valid!") + return 0 + else: + print() + print("Installation has issues. Please check the errors above.") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) From c889da6c0a076f29d6287e273dcf2dcfa0e5bd46 Mon Sep 17 00:00:00 2001 From: Byron Williams Date: Fri, 9 Jan 2026 14:36:15 -0800 Subject: [PATCH 07/11] feat(gemini-image): add missing aspect ratios and reference image validation Per Gemini API documentation review: - Add 5 missing aspect ratios: 2:3, 3:2, 4:5, 5:4, 21:9 - Add reference image limit validation (3 for flash, 14 for pro) - Update README and USAGE_GUIDE with complete aspect ratio list - Add MAX_REFERENCE_IMAGES constant to models.py - Add tests for new aspect ratios and reference image limits Co-Authored-By: Claude Opus 4.5 --- FIXES_NEEDED.md | 226 ++++++++++++ packages/gemini-image/README.md | 9 +- packages/gemini-image/docs/USAGE_GUIDE.md | 13 + .../src/gemini_image/generator.py | 36 ++ .../gemini-image/src/gemini_image/models.py | 24 +- packages/gemini-image/tests/test_generator.py | 66 ++++ packages/gemini-image/tests/test_models.py | 14 +- tmp_cleanup/image-generationreview.md | 349 ++++++++++++++++++ 8 files changed, 730 insertions(+), 7 deletions(-) create mode 100644 FIXES_NEEDED.md create mode 100644 tmp_cleanup/image-generationreview.md diff --git a/FIXES_NEEDED.md b/FIXES_NEEDED.md new file mode 100644 index 0000000..5859fb9 --- /dev/null +++ b/FIXES_NEEDED.md @@ -0,0 +1,226 @@ +# python-libs cloudflare-auth Package Fixes Needed + +**Status**: Critical - Package cannot be used as a dependency +**Discovered**: 2025-12-06 during homelab-infra migration +**Reporter**: Claude Code (via ByronWilliams) + +--- + +## Summary + +The `byronwilliamscpa-cloudflare-auth` package has broken imports that prevent it from being used as a dependency in other projects. This violates the core purpose of python-libs as a shared library repository. + +## Issues Found + +### 1. ✅ FIXED LOCALLY: Incorrect Import Statements (21+ occurrences) + +**Problem**: Module imports use `src.cloudflare_auth` instead of `cloudflare_auth` + +**Status**: Fixed locally in this clone (not committed) + +**Files Affected**: +- `packages/cloudflare-auth/src/cloudflare_auth/__init__.py` +- `packages/cloudflare-auth/src/cloudflare_auth/middleware.py` +- `packages/cloudflare-auth/src/cloudflare_auth/middleware_enhanced.py` +- `packages/cloudflare-auth/src/cloudflare_auth/validators.py` +- `packages/cloudflare-auth/src/cloudflare_auth/redis_sessions.py` +- And potentially others + +**Example**: +```python +# Incorrect (current in repo) +from src.cloudflare_auth.models import CloudflareUser +from src.cloudflare_auth.middleware import CloudflareAuthMiddleware + +# Correct (fixed locally) +from cloudflare_auth.models import CloudflareUser +from cloudflare_auth.middleware import CloudflareAuthMiddleware +``` + +**Fix Applied**: +```bash +cd packages/cloudflare-auth +find src -name "*.py" -exec sed -i 's/from src\.cloudflare_auth/from cloudflare_auth/g' {} \; +find src -name "*.py" -exec sed -i 's/import src\.cloudflare_auth/import cloudflare_auth/g' {} \; +``` + +--- + +### 2. ❌ NOT FIXED: Missing CloudflareSettings Module + +**Problem**: Multiple files import from non-existent `src.config.settings` module + +**Files Affected**: +- `packages/cloudflare-auth/src/cloudflare_auth/validators.py:40` +- `packages/cloudflare-auth/src/cloudflare_auth/middleware.py` +- `packages/cloudflare-auth/src/cloudflare_auth/middleware_enhanced.py` + +**Import Statement**: +```python +from src.config.settings import CloudflareSettings, get_cloudflare_settings +``` + +**Problem**: This module doesn't exist in the cloudflare-auth package + +**Options for Resolution**: + +#### Option A: Create CloudflareSettings within package (RECOMMENDED) + +```python +# packages/cloudflare-auth/src/cloudflare_auth/settings.py +from pydantic_settings import BaseSettings + +class CloudflareSettings(BaseSettings): + """Cloudflare Access configuration settings.""" + + cloudflare_team_domain: str + cloudflare_audience_tag: str + cloudflare_enabled: bool = True + service_token_enabled: bool = False + # ... other settings + + class Config: + env_file = ".env" + +def get_cloudflare_settings() -> CloudflareSettings: + """Get singleton CloudflareSettings instance.""" + return CloudflareSettings() +``` + +Then update imports: +```python +# In validators.py, middleware.py, middleware_enhanced.py +from cloudflare_auth.settings import CloudflareSettings, get_cloudflare_settings +``` + +#### Option B: Make settings injectable (alternative) + +Remove direct settings import and require settings to be passed as parameters: + +```python +# In validators.py +class CloudflareJWTValidator: + def __init__(self, team_domain: str, audience_tag: str): + self.team_domain = team_domain + self.audience_tag = audience_tag +``` + +**Recommendation**: Option A is preferred for backward compatibility and ease of use. + +--- + +## Root Cause + +The package appears to have been copied from a different project structure where it was nested under `src/` with a separate `src/config/` module. When moved to the python-libs monorepo structure, imports were not updated to reflect the new package structure. + +--- + +## Impact + +**Current State**: +- ❌ Package cannot be imported: `ModuleNotFoundError: No module named 'src.config'` +- ❌ Cannot be used as a dependency in other projects +- ❌ Forces code duplication instead of reuse +- ❌ Violates the stated purpose of python-libs repository + +**Blocked Projects**: +- homelab-infra (PR #54) - waiting for these fixes before tests can run + +--- + +## Action Items + +### Immediate (Required for PR #54) + +1. **Commit local import fixes**: + ```bash + cd packages/cloudflare-auth + git add src/cloudflare_auth/*.py + git commit -m "fix: correct all module imports from src.cloudflare_auth to cloudflare_auth" + ``` + +2. **Create CloudflareSettings module**: + - Create `src/cloudflare_auth/settings.py` + - Define `CloudflareSettings` and `get_cloudflare_settings` + - Update imports in affected files + - Test imports work correctly + +3. **Add import tests**: + ```python + # tests/test_imports.py + def test_public_imports(): + """Verify all public imports work correctly.""" + from cloudflare_auth import ( + CloudflareAuthMiddleware, + CloudflareUser, + setup_cloudflare_auth_enhanced, + ) + assert CloudflareAuthMiddleware is not None + ``` + +4. **Add CI check**: + ```yaml + # .github/workflows/ci.yml + - name: Test package imports + run: | + uv run python -c "from cloudflare_auth import CloudflareUser" + ``` + +### Follow-up (Before PyPI Publication) + +5. **Publish to PyPI**: + - Once fixes are complete and tests pass + - Follow semantic versioning (v0.1.1 for patch) + - Update homelab-infra to use PyPI package instead of path dependency + +6. **Update template**: + - Add import tests to cookiecutter-python-template + - Ensure packages in workspace members have validated imports + +--- + +## Files Changed Locally (Not Committed) + +All files in `packages/cloudflare-auth/src/cloudflare_auth/`: +- `__init__.py` - Fixed imports and docstring +- `middleware.py` - Fixed imports +- `middleware_enhanced.py` - Fixed imports +- `validators.py` - Fixed imports +- `security_helpers.py` - Fixed imports +- `sessions.py` - Fixed imports +- `utils.py` - Fixed imports +- `whitelist.py` - Fixed imports +- `models.py` - Fixed imports +- `csrf.py` - Fixed imports +- `rate_limiter.py` - Fixed imports +- `redis_sessions.py` - Fixed imports + +--- + +## Testing Verification + +After fixes are applied, verify with: + +```bash +# 1. Clean install test +cd /tmp +uv venv test-env +source test-env/bin/activate +uv pip install /path/to/python-libs/packages/cloudflare-auth + +# 2. Import test +python -c "from cloudflare_auth import CloudflareUser, setup_cloudflare_auth_enhanced" + +# 3. Run package tests +cd /path/to/python-libs/packages/cloudflare-auth +uv run pytest tests/ -v +``` + +--- + +## Contact + +For questions or to coordinate fixes: +- **Project**: homelab-infra (blocked PR #54) +- **Reporter**: Claude Code +- **Related**: [homelab-infra PR #54](https://github.com/ByronWilliamsCPA/homelab-infra/pull/54) diff --git a/packages/gemini-image/README.md b/packages/gemini-image/README.md index 156d759..097cd48 100644 --- a/packages/gemini-image/README.md +++ b/packages/gemini-image/README.md @@ -141,10 +141,15 @@ gemini-image --list-models ## Aspect Ratios - `1:1` - Square +- `2:3` - Portrait (2x3) +- `3:2` - Landscape (3x2) - `3:4` - Portrait - `4:3` - Standard landscape +- `4:5` - Portrait (Instagram) +- `5:4` - Landscape (5x4) - `9:16` - Vertical/mobile - `16:9` - Widescreen (default) +- `21:9` - Ultra-wide/cinematic ## Draft-Then-Finalize Workflow @@ -184,9 +189,9 @@ Supported fields per prompt: - `prompt` (required): Text description - `output_path`: Specific output filename - `model_key`: "flash" or "pro" -- `aspect_ratio`: "1:1", "3:4", "4:3", "9:16", "16:9" +- `aspect_ratio`: Any supported ratio (see [Aspect Ratios](#aspect-ratios)) - `image_size`: "1K", "2K", "4K" -- `reference_images`: List of reference image paths +- `reference_images`: List of reference image paths (max 3 for flash, 14 for pro) ## PROMPTS.md Registry diff --git a/packages/gemini-image/docs/USAGE_GUIDE.md b/packages/gemini-image/docs/USAGE_GUIDE.md index 882dc8b..0de212d 100644 --- a/packages/gemini-image/docs/USAGE_GUIDE.md +++ b/packages/gemini-image/docs/USAGE_GUIDE.md @@ -113,10 +113,15 @@ result = generate_image( Supported aspect ratios: - `1:1` - Square (social media, profile images) +- `2:3` - Portrait (2x3, print photos) +- `3:2` - Landscape (3x2, DSLR photos) - `3:4` - Portrait (documents, posters) - `4:3` - Standard landscape (presentations) +- `4:5` - Portrait (Instagram posts) +- `5:4` - Landscape (5x4, large format) - `9:16` - Vertical/mobile (stories, mobile apps) - `16:9` - Widescreen (documents, videos) +- `21:9` - Ultra-wide/cinematic (movie posters, banners) ```python # Square image for social media @@ -124,6 +129,9 @@ result = generate_image("Profile avatar", aspect_ratio="1:1") # Vertical for mobile result = generate_image("Mobile wallpaper", aspect_ratio="9:16") + +# Ultra-wide cinematic +result = generate_image("Movie poster banner", aspect_ratio="21:9") ``` ### Reference-Based Editing @@ -146,6 +154,11 @@ CLI: gemini-image "Add more clouds" -r original.png -o edited.png ``` +**Reference image limits:** + +- Flash model: Up to 3 reference images +- Pro model: Up to 14 reference images + ### Multi-Part Story Sequences Generate a series of related images with visual continuity: diff --git a/packages/gemini-image/src/gemini_image/generator.py b/packages/gemini-image/src/gemini_image/generator.py index 53ac2b4..e57e62b 100644 --- a/packages/gemini-image/src/gemini_image/generator.py +++ b/packages/gemini-image/src/gemini_image/generator.py @@ -27,6 +27,7 @@ ASPECT_RATIOS, DEFAULT_MODEL, IMAGE_SIZES, + MAX_REFERENCE_IMAGES, MODELS, ) from gemini_image.registry import PromptRegistry @@ -87,6 +88,7 @@ def generate_image( _validate_model_key(model_key) _validate_aspect_ratio(aspect_ratio) _validate_image_size(image_size) + _validate_reference_images(reference_images, model_key) model_config = MODELS[model_key] @@ -647,6 +649,40 @@ def _validate_image_size(image_size: str | None) -> None: ) +def _validate_reference_images( + reference_images: list[Path] | None, + model_key: str, +) -> None: + """Validate reference images count against model limits. + + Per API documentation: + - Flash model: Up to 3 reference images + - Pro model: Up to 14 reference images + + Args: + reference_images: List of reference image paths. + model_key: Model key being used. + + Raises: + ValidationError: If too many reference images for the model. + + """ + if reference_images is None: + return + + count = len(reference_images) + max_allowed = MAX_REFERENCE_IMAGES.get(model_key, 3) # type: ignore[arg-type] + + if count > max_allowed: + raise ValidationError( + f"Too many reference images for {model_key} model: {count} provided, " + f"maximum is {max_allowed}", + field="reference_images", + value=count, + valid_options=[f"1-{max_allowed}"], + ) + + def _build_contents( reference_images: list[Path] | None, prompt: str, diff --git a/packages/gemini-image/src/gemini_image/models.py b/packages/gemini-image/src/gemini_image/models.py index 57db449..13da5c1 100644 --- a/packages/gemini-image/src/gemini_image/models.py +++ b/packages/gemini-image/src/gemini_image/models.py @@ -6,9 +6,15 @@ # Type aliases for model configuration ModelKey = Literal["flash", "pro"] -AspectRatio = Literal["1:1", "3:4", "4:3", "9:16", "16:9"] +AspectRatio = Literal["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"] ImageSize = Literal["1K", "2K", "4K"] +# Reference image limits per model +MAX_REFERENCE_IMAGES: dict[ModelKey, int] = { + "flash": 3, + "pro": 14, +} + class ModelConfig(TypedDict): """Configuration for a Gemini image generation model.""" @@ -38,8 +44,20 @@ class ModelConfig(TypedDict): DEFAULT_MODEL: ModelKey = "pro" -# Valid aspect ratios for pro model -ASPECT_RATIOS: list[AspectRatio] = ["1:1", "3:4", "4:3", "9:16", "16:9"] +# Valid aspect ratios (all models) +# Per API docs: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9 +ASPECT_RATIOS: list[AspectRatio] = [ + "1:1", # Square + "2:3", # Portrait (2x3) + "3:2", # Landscape (3x2) + "3:4", # Portrait (3x4) + "4:3", # Standard landscape + "4:5", # Portrait (Instagram) + "5:4", # Landscape (5x4) + "9:16", # Vertical/mobile + "16:9", # Widescreen + "21:9", # Ultra-wide/cinematic +] # Valid image sizes for pro model IMAGE_SIZES: list[ImageSize] = ["1K", "2K", "4K"] diff --git a/packages/gemini-image/tests/test_generator.py b/packages/gemini-image/tests/test_generator.py index 7ab6b2e..b3b6485 100644 --- a/packages/gemini-image/tests/test_generator.py +++ b/packages/gemini-image/tests/test_generator.py @@ -29,6 +29,72 @@ def test_generate_image_invalid_model_raises(self) -> None: with pytest.raises(ValidationError, match="Unknown model"): generate_image("test prompt", model_key="invalid") # type: ignore[arg-type] + def test_generate_image_invalid_aspect_ratio_raises(self) -> None: + """Test that invalid aspect ratio raises ValidationError.""" + with pytest.raises(ValidationError, match="Invalid aspect ratio"): + generate_image("test prompt", aspect_ratio="invalid") # type: ignore[arg-type] + + def test_generate_image_new_aspect_ratios_valid( + self, + tmp_path: Path, + mock_genai_response: MagicMock, + ) -> None: + """Test that new API aspect ratios are accepted.""" + mock_genai = MagicMock() + mock_types = MagicMock() + + mock_client = MagicMock() + mock_client.models.generate_content.return_value = mock_genai_response + mock_genai.Client.return_value = mock_client + + # Test one of the new aspect ratios + with ( + patch.object(client_module, "_genai", mock_genai), + patch.object(client_module, "_types", mock_types), + patch.dict(os.environ, {"GEMINI_API_KEY": "test-key"}), + ): + result = generate_image( + prompt="Test with 21:9 ultra-wide", + aspect_ratio="21:9", + output_dir=tmp_path, + document=False, + ) + + assert result is not None + assert result.exists() + + def test_generate_image_too_many_references_flash_raises( + self, + tmp_path: Path, + sample_image_path: Path, + ) -> None: + """Test that too many reference images for flash model raises ValidationError.""" + # Flash model allows max 3 reference images + too_many_refs = [sample_image_path] * 4 + + with pytest.raises(ValidationError, match="Too many reference images"): + generate_image( + "test prompt", + model_key="flash", + reference_images=too_many_refs, + ) + + def test_generate_image_too_many_references_pro_raises( + self, + tmp_path: Path, + sample_image_path: Path, + ) -> None: + """Test that too many reference images for pro model raises ValidationError.""" + # Pro model allows max 14 reference images + too_many_refs = [sample_image_path] * 15 + + with pytest.raises(ValidationError, match="Too many reference images"): + generate_image( + "test prompt", + model_key="pro", + reference_images=too_many_refs, + ) + def test_generate_image_missing_api_key_raises(self) -> None: """Test that missing API key raises ConfigurationError.""" from gemini_image.exceptions import ConfigurationError diff --git a/packages/gemini-image/tests/test_models.py b/packages/gemini-image/tests/test_models.py index 6fac8a8..378f286 100644 --- a/packages/gemini-image/tests/test_models.py +++ b/packages/gemini-image/tests/test_models.py @@ -6,6 +6,7 @@ ASPECT_RATIOS, DEFAULT_MODEL, IMAGE_SIZES, + MAX_REFERENCE_IMAGES, MODELS, ) @@ -30,11 +31,20 @@ def test_default_model_is_pro(self) -> None: assert DEFAULT_MODEL == "pro" def test_aspect_ratios(self) -> None: - """Test that all expected aspect ratios are defined.""" - expected = ["1:1", "3:4", "4:3", "9:16", "16:9"] + """Test that all expected aspect ratios are defined per API docs.""" + # Per API docs: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9 + expected = [ + "1:1", "2:3", "3:2", "3:4", "4:3", + "4:5", "5:4", "9:16", "16:9", "21:9", + ] assert expected == ASPECT_RATIOS def test_image_sizes(self) -> None: """Test that all expected image sizes are defined.""" expected = ["1K", "2K", "4K"] assert expected == IMAGE_SIZES + + def test_max_reference_images(self) -> None: + """Test that reference image limits are defined correctly.""" + assert MAX_REFERENCE_IMAGES["flash"] == 3 + assert MAX_REFERENCE_IMAGES["pro"] == 14 diff --git a/tmp_cleanup/image-generationreview.md b/tmp_cleanup/image-generationreview.md new file mode 100644 index 0000000..0be5050 --- /dev/null +++ b/tmp_cleanup/image-generationreview.md @@ -0,0 +1,349 @@ +# Image Generation Repository Analysis Report + +> **Analysis Date**: 2026-01-09 +> **Methodology**: Multi-Model Consensus Analysis +> **Models Consulted**: 5 frontier AI models with varying stances + +## Executive Summary + +This report presents a comprehensive analysis of the `image-generation` repository, a toolkit for generating images using Google's Gemini AI models (Gemini 2.5 Flash and Gemini 3 Pro "Nano Banana"). The analysis was conducted using a multi-model consensus approach with five frontier AI models to identify practical and functional gaps. + +**Overall Assessment**: The repository provides a **robust foundation** with thoughtful workflows and excellent documentation, but has **significant gaps** in testing, error resilience, and production-grade features that impact reliability for automated or batch workflows. + +--- + +## Models Consulted + +| Model | Stance | Confidence | Key Focus | +|-------|--------|------------|-----------| +| Google Gemini 2.5 Pro | For | 9/10 | Strengths advocacy | +| Google Gemini 3 Pro Preview | Against | 10/10 | Critical gap identification | +| OpenAI GPT-5.2 | Neutral | 8/10 | Balanced analysis | +| DeepSeek R1-0528 | Against | 9/10 | Security & code quality | +| xAI Grok-4 | Neutral | 8/10 | User experience focus | + +--- + +## Consensus Findings + +### Universal Agreement (5/5 Models) + +These issues were identified by **all five models** and represent the highest-priority gaps: + +#### 1. Monolithic Script Structure +**Location**: [generate_image.py](scripts/generate_image.py) (941 lines) + +The single-file structure mixes: + +- CLI argument parsing +- Gemini API interaction +- File I/O operations +- Markdown registry formatting +- Story sequence logic + +**Impact**: Makes unit testing nearly impossible, increases maintenance burden, and limits extensibility. + +**Recommendation**: Refactor into modules: + +``` +scripts/ +├── generate_image.py # CLI entry point +├── client.py # GeminiClient class +├── io.py # Format detection, file saving +├── registry.py # PROMPTS.md handling +└── utils.py # Shared utilities +``` + +#### 2. Zero Test Coverage +**Current State**: No unit tests, integration tests, or CI/CD configuration. + +**Critical paths lacking tests**: + +- `detect_image_format()` - Magic byte detection (lines 104-119) +- Output path routing rules (draft/final/story) +- PROMPTS.md insertion logic and idempotency +- API response parsing (thoughts vs final parts) +- Multi-part story sequencing + +**Recommendation**: Add pytest suite with minimum coverage for: + +- Format detection edge cases +- Path routing logic +- Registry table updates + +#### 3. No Retry/Backoff for API Failures +**Location**: [generate_image.py:558-562](scripts/generate_image.py#L558-L562) + +```python +except Exception as e: + print(f"Error generating image: {e}") +``` + +**Missing resilience for**: + +- HTTP 429 rate limiting +- HTTP 5xx server errors +- Network timeouts/resets +- Transient connection failures + +**Recommendation**: Add `tenacity` or similar retry decorator: + +```python +@retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=4, max=60), + retry=retry_if_exception_type((RateLimitError, NetworkError)) +) +def generate_image(...): +``` + +#### 4. Claude Agent Integration is Conceptual Only +**Location**: [diagram-specialist.md](agents/diagram-specialist.md) + +The agent configuration file documents a validation workflow but: + +- No programmatic integration with `generate_image.py` +- No script to invoke validation automatically +- No structured metadata export (JSON sidecar) for agents +- `/diagram` commands referenced but not implemented in CLI + +**Recommendation**: Create `scripts/validate_image.py` that: + +- Invokes Claude API with diagram-specialist prompts +- Accepts generated image path as input +- Outputs validation report +- Optionally blocks finalization on validation failure + +--- + +### Strong Agreement (4/5 Models) + +#### 5. Story Mode Lacks Resume Capability +**Location**: [generate_image.py:565-659](scripts/generate_image.py#L565-L659) + +The `generate_story_sequence()` function iterates 1 to N without checking if files exist. If a 10-part story fails on part 9, re-running regenerates parts 1-8. + +**Impact**: Wasted API costs and time on already-generated content. + +**Recommendation**: + +```python +# Check if part already exists +output_path = output_prefix.parent / f"{output_prefix.stem}_part{part_num}.png" +if output_path.exists(): + print(f"Skipping part {part_num} (already exists)") + previous_image_path = output_path + continue +``` + +#### 6. `--finalize` Doesn't Parse PROMPTS.md for Original Prompt +**Location**: [generate_image.py:827-847](scripts/generate_image.py#L827-L847) + +The code claims to read the draft's PROMPTS.md entry but: + +- Only performs a naive substring search +- Falls back to generic upscaling prompt +- Doesn't actually extract and reuse original parameters + +**Recommendation**: Store metadata in JSON sidecar per image: + +``` +output/drafts/ +├── draft_20260109_123456.png +└── draft_20260109_123456.json # Contains prompt, params, model +``` + +#### 7. Output Path Behavior is Surprising +**Location**: [generate_image.py:501-513](scripts/generate_image.py#L501-L513) + +```python +if not str(output_path).startswith("output"): + output_path = script_dir / "output" / output_path.name +``` + +**Issues**: + +- `-o /tmp/myimage.png` gets rewritten to `output/myimage.png` +- Violates CLI conventions where user-specified paths are honored +- `startswith("output")` is brittle across platforms +- Path traversal possible via `output/../../somewhere` + +**Recommendation**: + +- Honor absolute paths as-is +- Add explicit `--output-dir` flag +- Use `pathlib.Path.resolve()` for safe path handling + +--- + +### Moderate Agreement (3/5 Models) + +#### 8. Python 3.11+ Requirement Not Documented +**Location**: Multiple occurrences of `datetime.UTC` + +Lines 205, 423-425, 479-480, 594-595 use `datetime.UTC` which was added in Python 3.11. + +**Impact**: Script fails silently or with cryptic errors on Python 3.10. + +**Recommendation**: + +```python +# Compatible with Python 3.9+ +from datetime import datetime, timezone +datetime.now(tz=timezone.utc) # Instead of datetime.UTC +``` + +Or document Python 3.11+ requirement in README. + +#### 9. Manual .env Parsing Should Use python-dotenv +**Location**: [generate_image.py:85-93](scripts/generate_image.py#L85-L93) + +Current implementation: + +- May fail on complex quoted values +- Doesn't handle comments +- No whitespace/newline robustness + +**Recommendation**: Add `python-dotenv` to dependencies and use standard loading. + +#### 10. No Batch Processing Support +**Current State**: Only single prompt or story mode supported. + +**User expectation**: Generate multiple images from CSV/JSON/text file of prompts. + +**Recommendation**: Add `--batch prompts.json` flag: + +```json +[ + {"prompt": "Network diagram", "aspect": "16:9", "size": "2K"}, + {"prompt": "Server rack layout", "aspect": "9:16", "size": "2K"} +] +``` + +--- + +## Additional Findings by Category + +### Security Considerations + +| Issue | Severity | Location | Recommendation | +|-------|----------|----------|----------------| +| No `--no-document` flag for privacy | Medium | Line 917-929 | Add flag to disable PROMPTS.md logging | +| Verbose mode may log sensitive data | Medium | Line 446 | Scrub API keys from output | +| Path traversal possible | Low | Line 501-513 | Use `Path.resolve()` with validation | +| .env commit not warned | Low | README.md | Add explicit warning about .env security | +| No Gemini safety_settings configured | Medium | Line 376 | Handle content blocks gracefully | + +### Documentation Gaps + +| Issue | Location | Recommendation | +|-------|----------|----------------| +| Model ID mismatch in docstring | Lines 5-8 vs 58-69 | Sync docstring with MODELS dict | +| Story output location mismatch | IMAGE_GENERATION_GUIDE.md | Clarify output/ prepending behavior | +| PROMPTS.md template is empty | examples/PROMPTS.md | Add example entries | +| Max 14 ref images buried | IMAGE_GENERATION_GUIDE.md line 37 | Highlight in main features | +| Windows incompatibility of `file` command | README.md line 217 | Document alternatives | + +### Code Quality Issues + +| Issue | Location | Recommendation | +|-------|----------|----------------| +| Broad exception handling | Line 558 | Handle specific API errors | +| Thought images use MIME substring | Line 411-415 | Use `detect_image_format()` consistently | +| Inefficient base64 round-trip | Lines 133-161, 327-333 | Keep raw bytes, detect MIME directly | +| Only first candidate processed | Line 395-398 | Handle multi-candidate responses | +| Hardcoded path separators | Line 420 | Use `pathlib` consistently | + +### Platform Compatibility + +| Issue | Affected Platform | Recommendation | +|-------|-------------------|----------------| +| `datetime.UTC` | Python < 3.11 | Use `timezone.utc` | +| `startswith("output")` | Windows | Use `pathlib` methods | +| Hardcoded `/` separators | Windows | Use `Path` objects | +| `file` command | Windows | Document `python-magic` alternative | + +--- + +## Prioritized Recommendations + +### Critical (Address Before Sharing Widely) + +1. **Fix Python compatibility**: Replace `datetime.UTC` with `timezone.utc` +2. **Add retry logic**: Implement exponential backoff for API calls +3. **Add story resume**: Skip existing parts in story mode +4. **Fix finalize workflow**: Actually parse PROMPTS.md or use JSON sidecar + +### High Priority (Production Readiness) + +5. **Add unit tests**: Focus on `detect_image_format`, path routing, registry updates +6. **Refactor script**: Split into modules for testability +7. **Honor output paths**: Let users specify absolute paths +8. **Add `--no-document`**: Privacy option for prompt logging + +### Medium Priority (Enhanced Usability) + +9. **Add progress indicators**: Spinner or progress bar for long operations +10. **Add batch processing**: CSV/JSON prompt file support +11. **Automate Claude validation**: Create `validate_image.py` script +12. **Use python-dotenv**: Standard .env file handling + +### Low Priority (Polish) + +13. **Add CI/CD**: GitHub Actions for linting, tests +14. **Add pyproject.toml**: Modern Python packaging +15. **Add type checking**: Enable strict basedpyright +16. **Warn on model-specific flags**: Error when `--search` used with flash + +--- + +## Strengths Acknowledged + +All models recognized these as **significant strengths**: + +1. **Draft-then-finalize workflow**: Cost-effective iteration pattern +2. **Automatic format detection**: Magic bytes solve API MIME mismatches +3. **Comprehensive documentation**: README, guide, and agent config +4. **Minimal dependencies**: Single `google-genai` dependency +5. **CLI-first design**: Enables automation and scripting +6. **PROMPTS.md registry**: Automatic documentation of generations +7. **Multi-part story generation**: Unique continuity feature +8. **Reference image support**: Up to 14 images for editing/style transfer + +--- + +## Conclusion + +The image-generation repository is a **well-designed toolkit** with thoughtful features for its target use case of technical diagram generation. The draft-finalize workflow, automatic format detection, and comprehensive documentation demonstrate mature design thinking. + +However, the repository is **not production-ready** due to: + +- Zero test coverage +- Missing error resilience (no retries) +- Python version compatibility issues +- Surprising output path behavior + +Addressing the **Critical** and **High Priority** recommendations would transform this from a personal utility into a sharable, reliable tool suitable for broader adoption. + +--- + +## Appendix: Model Verdicts + +### Gemini 2.5 Pro (For) +> "This is a thoughtfully designed and highly practical toolkit that excels in its specific niche of technical diagram generation, with robust workflows and excellent documentation that address real-world API limitations." + +### Gemini 3 Pro Preview (Against) +> "The repository offers a functional and creative toolkit for simplified image generation, but its reliance on brittle text scraping for state management and lack of production-grade resilience severely limits its reliability for batch or automated workflows." + +### GPT-5.2 (Neutral) +> "Strong practical foundation for a 'power-user' Gemini image CLI, but it has several functional expectation gaps plus maintainability/test/security hardening needed before broader adoption." + +### DeepSeek R1 (Against) +> "The repository provides a robust foundation for Gemini-powered image generation but has significant gaps in validation, error resilience, testing, and security hardening that impact production readiness." + +### Grok-4 (Neutral) +> "Robust toolkit with strong core features for Gemini-based image generation, but gaps in testing, modularity, and advanced user functionalities limit long-term maintainability and scalability." + +--- + +*Report generated via multi-model consensus analysis using PAL MCP Server* From d665a68687c0835e41e3467d92379ce7b468d392 Mon Sep 17 00:00:00 2001 From: Byron Williams Date: Fri, 9 Jan 2026 15:18:51 -0800 Subject: [PATCH 08/11] docs(gemini-image): add security docs and memory warning - Add Security section to README documenting API key handling - Add Memory Considerations section for 4K batch operations - Fix darglint docstring issue in io.py detect_image_format Co-Authored-By: Claude Opus 4.5 --- packages/gemini-image/README.md | 31 ++++++++++++++++++++ packages/gemini-image/docs/USAGE_GUIDE.md | 21 ++++++++++++- packages/gemini-image/src/gemini_image/io.py | 2 +- 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/packages/gemini-image/README.md b/packages/gemini-image/README.md index 097cd48..1f05f85 100644 --- a/packages/gemini-image/README.md +++ b/packages/gemini-image/README.md @@ -302,6 +302,37 @@ except GeminiImageError as e: print(f"Generation failed: {e}") ``` +## Security + +### API Key Handling + +The library loads API keys securely: + +- **Environment variable**: Set `GEMINI_API_KEY` in your environment +- **`.env` file**: The library auto-loads `.env` files via python-dotenv +- **Never logged**: API keys are never included in exception messages or logs +- **Never stored**: Keys are not persisted to disk by the library + +**Best practices:** + +```bash +# Add to .gitignore +echo ".env" >> .gitignore + +# Set permissions (Unix) +chmod 600 .env +``` + +**What NOT to do:** + +```python +# ❌ Don't hardcode API keys +generate_image("prompt", api_key="AIza...") # Not supported, use env var + +# ❌ Don't commit .env files +# ❌ Don't log or print API keys +``` + ## Development ```bash diff --git a/packages/gemini-image/docs/USAGE_GUIDE.md b/packages/gemini-image/docs/USAGE_GUIDE.md index 0de212d..89edef0 100644 --- a/packages/gemini-image/docs/USAGE_GUIDE.md +++ b/packages/gemini-image/docs/USAGE_GUIDE.md @@ -248,10 +248,29 @@ gemini-image --batch prompts.json -d ./output | `prompt` | str | Text description (required) | | `output_path` | str | Specific output filename | | `model_key` | str | "flash" or "pro" | -| `aspect_ratio` | str | "1:1", "3:4", "4:3", "9:16", "16:9" | +| `aspect_ratio` | str | Any supported ratio (see [Aspect Ratios](#aspect-ratios)) | | `image_size` | str | "1K", "2K", "4K" | | `reference_images` | list | Paths to reference images | +**Memory Considerations:** + +When generating 4K images in batch, be aware of memory usage: + +- **4K images**: ~50-100MB per uncompressed image in memory +- **Batch of 10 at 4K**: May require 500MB-1GB+ RAM during processing +- **Recommendation**: For large 4K batches, process in smaller chunks or use 2K resolution + +```python +# For memory-constrained environments, chunk large batches +def chunked_batch(prompts: list, chunk_size: int = 5) -> list: + """Process batch in chunks to limit memory usage.""" + results = [] + for i in range(0, len(prompts), chunk_size): + chunk = prompts[i:i + chunk_size] + results.extend(generate_batch(chunk, output_dir=Path("./output"))) + return results +``` + ## Workflow Patterns ### Draft-Then-Finalize Workflow diff --git a/packages/gemini-image/src/gemini_image/io.py b/packages/gemini-image/src/gemini_image/io.py index 639deaa..0ca3e75 100644 --- a/packages/gemini-image/src/gemini_image/io.py +++ b/packages/gemini-image/src/gemini_image/io.py @@ -61,7 +61,7 @@ def detect_image_format(data: bytes) -> str: data: Raw image bytes. Returns: - Format string: 'png', 'jpeg', 'gif', or 'webp'. + The detected format ('png', 'jpeg', 'gif', or 'webp'). Raises: FormatDetectionError: If the format cannot be detected. From b361162a261c8ae15dbcc571857c13a5abca7b88 Mon Sep 17 00:00:00 2001 From: Byron Williams Date: Fri, 9 Jan 2026 15:38:10 -0800 Subject: [PATCH 09/11] fix: address CI failures and CodeRabbit review comments CI Fixes: - Add SPDX license header to validate_installation.py (REUSE compliance) - Remove API key logging to fix CodeQL security warning - Change [Unreleased] to [0.2.0] with release date (changelog check) CodeRabbit Review Fixes: - FIXES_NEEDED.md: split long line, add blank lines around code blocks - CHANGELOG.md: remove redundant "CLI interface" -> "CLI" - CONTRIBUTING.md: add language to code block, fix formatting - USAGE_GUIDE.md: add missing imports to RateLimitError example - README.md: update PROMPTS.md example to match actual table format Co-Authored-By: Claude Opus 4.5 --- FIXES_NEEDED.md | 11 ++++++- docs/secure.md | 30 +++++++++--------- packages/gemini-image/CHANGELOG.md | 6 ++-- packages/gemini-image/CONTRIBUTING.md | 4 ++- packages/gemini-image/README.md | 31 +++++++++++++++---- packages/gemini-image/docs/USAGE_GUIDE.md | 3 ++ .../scripts/validate_installation.py | 7 +++-- 7 files changed, 63 insertions(+), 29 deletions(-) diff --git a/FIXES_NEEDED.md b/FIXES_NEEDED.md index 5859fb9..ca522f0 100644 --- a/FIXES_NEEDED.md +++ b/FIXES_NEEDED.md @@ -8,7 +8,9 @@ ## Summary -The `byronwilliamscpa-cloudflare-auth` package has broken imports that prevent it from being used as a dependency in other projects. This violates the core purpose of python-libs as a shared library repository. +The `byronwilliamscpa-cloudflare-auth` package has broken imports that prevent it from being used +as a dependency in other projects. This violates the core purpose of python-libs as a shared +library repository. ## Issues Found @@ -27,6 +29,7 @@ The `byronwilliamscpa-cloudflare-auth` package has broken imports that prevent i - And potentially others **Example**: + ```python # Incorrect (current in repo) from src.cloudflare_auth.models import CloudflareUser @@ -38,6 +41,7 @@ from cloudflare_auth.middleware import CloudflareAuthMiddleware ``` **Fix Applied**: + ```bash cd packages/cloudflare-auth find src -name "*.py" -exec sed -i 's/from src\.cloudflare_auth/from cloudflare_auth/g' {} \; @@ -56,6 +60,7 @@ find src -name "*.py" -exec sed -i 's/import src\.cloudflare_auth/import cloudfl - `packages/cloudflare-auth/src/cloudflare_auth/middleware_enhanced.py` **Import Statement**: + ```python from src.config.settings import CloudflareSettings, get_cloudflare_settings ``` @@ -88,6 +93,7 @@ def get_cloudflare_settings() -> CloudflareSettings: ``` Then update imports: + ```python # In validators.py, middleware.py, middleware_enhanced.py from cloudflare_auth.settings import CloudflareSettings, get_cloudflare_settings @@ -133,6 +139,7 @@ The package appears to have been copied from a different project structure where ### Immediate (Required for PR #54) 1. **Commit local import fixes**: + ```bash cd packages/cloudflare-auth git add src/cloudflare_auth/*.py @@ -146,6 +153,7 @@ The package appears to have been copied from a different project structure where - Test imports work correctly 3. **Add import tests**: + ```python # tests/test_imports.py def test_public_imports(): @@ -159,6 +167,7 @@ The package appears to have been copied from a different project structure where ``` 4. **Add CI check**: + ```yaml # .github/workflows/ci.yml - name: Test package imports diff --git a/docs/secure.md b/docs/secure.md index bb3b60a..302694d 100644 --- a/docs/secure.md +++ b/docs/secure.md @@ -1,7 +1,7 @@ # python-libs Publishing Handoff Document -> **Repository:** [ByronWilliamsCPA/python-libs](https://github.com/ByronWilliamsCPA/python-libs) -> **Last Updated:** 2025-12-04 +> **Repository:** [ByronWilliamsCPA/python-libs](https://github.com/ByronWilliamsCPA/python-libs) +> **Last Updated:** 2025-12-04 > **Status:** Ready for Implementation ## Overview @@ -144,7 +144,7 @@ jobs: run: | TAG="${{ github.ref_name }}" echo "Processing tag: $TAG" - + if [[ "$TAG" == cloudflare-auth-v* ]]; then echo "package_dir=packages/cloudflare-auth" >> $GITHUB_OUTPUT echo "package_name=byronwilliamscpa-cloudflare-auth" >> $GITHUB_OUTPUT @@ -240,60 +240,60 @@ from pydantic_settings import BaseSettings, SettingsConfigDict class CloudflareSettings(BaseSettings): """Configuration for Cloudflare Access authentication.""" - + model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", extra="ignore", case_sensitive=False, ) - + # Required cloudflare_team_domain: str = Field(default="", alias="CLOUDFLARE_TEAM_DOMAIN") cloudflare_audience_tag: str = Field(default="", alias="CLOUDFLARE_AUDIENCE_TAG") cloudflare_enabled: bool = Field(default=True, alias="CLOUDFLARE_ENABLED") - + # Headers jwt_header_name: str = Field(default="Cf-Access-Jwt-Assertion", alias="CF_JWT_HEADER") email_header_name: str = Field(default="Cf-Access-Authenticated-User-Email", alias="CF_EMAIL_HEADER") - + # Security require_email_verification: bool = Field(default=True, alias="CF_REQUIRE_EMAIL_VERIFICATION") log_auth_failures: bool = Field(default=True, alias="CF_LOG_AUTH_FAILURES") require_cloudflare_headers: bool = Field(default=True, alias="CF_REQUIRE_CLOUDFLARE_HEADERS") - + # Access control allowed_email_domains: list[str] = Field(default_factory=list, alias="CF_ALLOWED_EMAIL_DOMAINS") allowed_tunnel_ips: list[str] = Field(default_factory=list, alias="CF_ALLOWED_TUNNEL_IPS") - + # Cookies cookie_domain: Optional[str] = Field(default=None, alias="CF_COOKIE_DOMAIN") cookie_path: str = Field(default="/", alias="CF_COOKIE_PATH") cookie_secure: bool = Field(default=True, alias="CF_COOKIE_SECURE") cookie_samesite: str = Field(default="lax", alias="CF_COOKIE_SAMESITE") - + # JWT jwt_algorithm: str = Field(default="RS256", alias="CF_JWT_ALGORITHM") jwt_cache_max_keys: int = Field(default=16, alias="CF_JWT_CACHE_MAX_KEYS") - + @field_validator("allowed_email_domains", "allowed_tunnel_ips", mode="before") @classmethod def parse_comma_separated(cls, v): if isinstance(v, str): return [item.strip() for item in v.split(",") if item.strip()] if v.strip() else [] return v or [] - + @property def issuer(self) -> str: if not self.cloudflare_team_domain: return "" domain = self.cloudflare_team_domain.rstrip("/") return f"https://{domain}" if not domain.startswith("https://") else domain - + @property def certs_url(self) -> str: return f"{self.issuer}/cdn-cgi/access/certs" if self.issuer else "" - + def is_email_allowed(self, email: str) -> bool: if not self.allowed_email_domains: return True @@ -537,4 +537,4 @@ elif [[ "$TAG" == new-package-v* ]]; then - **Infrastructure Questions:** Byron Williams - **Infisical Issues:** Check https://secrets.byronwilliamscpa.com status -- **GCP Issues:** Check `assured-oss-457903` project in GCP Console \ No newline at end of file +- **GCP Issues:** Check `assured-oss-457903` project in GCP Console diff --git a/packages/gemini-image/CHANGELOG.md b/packages/gemini-image/CHANGELOG.md index 846e5a9..3551daa 100644 --- a/packages/gemini-image/CHANGELOG.md +++ b/packages/gemini-image/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.2.0] - 2026-01-09 ### Added @@ -41,12 +41,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `generate_image()` function for text-to-image generation - `generate_story_sequence()` function for multi-part stories - `finalize_draft()` function for draft-to-final workflow -- CLI interface (`gemini-image` command) +- CLI (`gemini-image` command) - Support for Gemini 2.5 Flash and Gemini 3 Pro models - Aspect ratio and resolution configuration - Reference image support for editing - Thinking mode with intermediate image visualization - Draft mode for cost-effective iteration -[Unreleased]: https://github.com/ByronWilliamsCPA/python-libs/compare/gemini-image-v0.1.0...HEAD +[0.2.0]: https://github.com/ByronWilliamsCPA/python-libs/compare/gemini-image-v0.1.0...gemini-image-v0.2.0 [0.1.0]: https://github.com/ByronWilliamsCPA/python-libs/releases/tag/gemini-image-v0.1.0 diff --git a/packages/gemini-image/CONTRIBUTING.md b/packages/gemini-image/CONTRIBUTING.md index 600646e..8aac6af 100644 --- a/packages/gemini-image/CONTRIBUTING.md +++ b/packages/gemini-image/CONTRIBUTING.md @@ -74,7 +74,7 @@ Use conventional branch names: Follow [Conventional Commits](https://www.conventionalcommits.org/): -``` +```text feat(generator): add retry logic for API calls fix(cli): handle missing API key gracefully docs(readme): update installation instructions @@ -119,12 +119,14 @@ def generate_image( 1. **Create a feature branch** from `main` 2. **Make your changes** with tests 3. **Run quality checks**: + ```bash uv run ruff format . uv run ruff check . uv run basedpyright src uv run pytest --cov ``` + 4. **Update documentation** if needed 5. **Submit a pull request** with: - Clear description of changes diff --git a/packages/gemini-image/README.md b/packages/gemini-image/README.md index 1f05f85..7c4d4a8 100644 --- a/packages/gemini-image/README.md +++ b/packages/gemini-image/README.md @@ -197,16 +197,35 @@ Supported fields per prompt: Every generation is automatically logged to `PROMPTS.md` in the output directory: -```markdown +````markdown ## Generation Log -### 2026-01-09 13:45:22 - generated_20260109_134522.jpg -- **Prompt**: A futuristic city at sunset -- **Model**: pro -- **Size**: 2K -- **Aspect**: 16:9 +| Image | Model | Date | Type | +|-------|-------|------|------| +| generated_20260109_134522.jpg | pro | 2026-01-09 | generated | + +--- + +### generated_20260109_134522.jpg + +| Property | Value | +|----------|-------| +| **File** | `output/generated_20260109_134522.jpg` | +| **Model** | pro | +| **Date** | 2026-01-09 13:45:22 | +| **Type** | generated | +| **Aspect Ratio** | 16:9 | +| **Size** | 2K | + +**Prompt:** + +``` +A futuristic city at sunset ``` +--- +```` + Disable with `--no-document` flag or `document=False` parameter. ## API Reference diff --git a/packages/gemini-image/docs/USAGE_GUIDE.md b/packages/gemini-image/docs/USAGE_GUIDE.md index 89edef0..9baac08 100644 --- a/packages/gemini-image/docs/USAGE_GUIDE.md +++ b/packages/gemini-image/docs/USAGE_GUIDE.md @@ -391,6 +391,9 @@ The library automatically retries with exponential backoff. If you still hit lim ```python import time +from gemini_image import generate_image +from gemini_image.exceptions import RateLimitError + for prompt in prompts: try: result = generate_image(prompt) diff --git a/packages/gemini-image/scripts/validate_installation.py b/packages/gemini-image/scripts/validate_installation.py index 309763a..2c16dbf 100755 --- a/packages/gemini-image/scripts/validate_installation.py +++ b/packages/gemini-image/scripts/validate_installation.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +# SPDX-FileCopyrightText: 2024 Byron Williams +# SPDX-License-Identifier: MIT """Validate gemini-image installation and configuration. Usage: @@ -90,9 +92,8 @@ def check_api_key() -> bool: key = get_api_key() if key and len(key) > 10: - # Mask the key for security - masked = f"{key[:4]}...{key[-4:]}" - print(f"OK (key: {masked})") + # Key found and valid length - don't log any part of the key + print("OK (API key configured)") return True print("FAILED: API key too short or empty") return False From 40b52a8534f73c1ecfc5e0c8ed09082d50e6feab Mon Sep 17 00:00:00 2001 From: Byron Williams Date: Fri, 9 Jan 2026 15:49:36 -0800 Subject: [PATCH 10/11] fix: address remaining CI and code quality issues REUSE Compliance: - Remove SPDX header from validate_installation.py (use REUSE.toml) - Add packages/**/scripts/**/*.py pattern to REUSE.toml SonarCloud: - Fix sonar.tests config: wildcards not allowed, use explicit paths Code Quality: - Replace assert with proper error handling in generator.py (Bandit B101) - Import GenerationError for the new error handling - Fix bare URL in SECURITY.md (MD034) - Update outdated Google AI Studio URL in USAGE_GUIDE.md Co-Authored-By: Claude Opus 4.5 --- REUSE.toml | 1 + packages/gemini-image/SECURITY.md | 2 +- packages/gemini-image/docs/USAGE_GUIDE.md | 2 +- packages/gemini-image/scripts/validate_installation.py | 2 -- packages/gemini-image/src/gemini_image/generator.py | 6 ++++-- sonar-project.properties | 4 ++-- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/REUSE.toml b/REUSE.toml index b6e6214..f77d5f4 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -14,6 +14,7 @@ path = [ "src/**", "packages/**/src/**/*.py", "packages/**/tests/**/*.py", + "packages/**/scripts/**/*.py", "tools/**", "tests/**", "validation/**", diff --git a/packages/gemini-image/SECURITY.md b/packages/gemini-image/SECURITY.md index 4539c82..c975ce6 100644 --- a/packages/gemini-image/SECURITY.md +++ b/packages/gemini-image/SECURITY.md @@ -13,7 +13,7 @@ If you discover a security vulnerability in this project, please report it responsibly: 1. **Do NOT** create a public GitHub issue for security vulnerabilities -2. Email the maintainer directly at: byronawilliams@gmail.com +2. Email the maintainer directly at: 3. Include: - Description of the vulnerability - Steps to reproduce diff --git a/packages/gemini-image/docs/USAGE_GUIDE.md b/packages/gemini-image/docs/USAGE_GUIDE.md index 9baac08..ae8bdd0 100644 --- a/packages/gemini-image/docs/USAGE_GUIDE.md +++ b/packages/gemini-image/docs/USAGE_GUIDE.md @@ -30,7 +30,7 @@ pip install byronwilliamscpa-gemini-image ### API Key Setup The library requires a Google Gemini API key. You can obtain one from the -[Google AI Studio](https://makersuite.google.com/app/apikey). +[Google AI Studio](https://aistudio.google.com/apikey). **Option 1: Environment variable** diff --git a/packages/gemini-image/scripts/validate_installation.py b/packages/gemini-image/scripts/validate_installation.py index 2c16dbf..47bd096 100755 --- a/packages/gemini-image/scripts/validate_installation.py +++ b/packages/gemini-image/scripts/validate_installation.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# SPDX-FileCopyrightText: 2024 Byron Williams -# SPDX-License-Identifier: MIT """Validate gemini-image installation and configuration. Usage: diff --git a/packages/gemini-image/src/gemini_image/generator.py b/packages/gemini-image/src/gemini_image/generator.py index e57e62b..4716ed4 100644 --- a/packages/gemini-image/src/gemini_image/generator.py +++ b/packages/gemini-image/src/gemini_image/generator.py @@ -16,7 +16,7 @@ import structlog from gemini_image.client import GeminiClient, _get_genai -from gemini_image.exceptions import ValidationError +from gemini_image.exceptions import GenerationError, ValidationError from gemini_image.io import ( get_extension_for_format, load_metadata, @@ -143,7 +143,9 @@ def generate_image( ) # Save the image (with format correction) - assert parsed.image_data is not None # We checked has_image above + if parsed.image_data is None: # pragma: no cover + msg = "Internal error: image_data is None after has_image check" + raise GenerationError(msg) saved_path = save_image(parsed.image_data, final_path, correct_extension=True) # Save thought images if requested diff --git a/sonar-project.properties b/sonar-project.properties index cbe910f..359224c 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -15,8 +15,8 @@ sonar.projectVersion=0.1.0 # Source directories (comma-separated) sonar.sources=src/,packages/ -# Test directories (comma-separated) -sonar.tests=tests/,packages/*/tests/ +# Test directories (comma-separated, no wildcards allowed) +sonar.tests=tests/,packages/cloudflare-auth/tests/,packages/gcs-utilities/tests/,packages/gemini-image/tests/ # Python version sonar.python.version=3.12 From 5c2eba6c3ed81e824a57f896ffcee1dd7ff86e82 Mon Sep 17 00:00:00 2001 From: Byron Williams Date: Fri, 9 Jan 2026 16:01:10 -0800 Subject: [PATCH 11/11] fix(qlty): use [[triage]] blocks for rule-specific ignores Replace unsupported [[plugin.ignore]] and [[smells.ignore]] syntax with the correct [[triage]] blocks to ignore Bandit B101 (assert_used) and B108 (hardcoded_tmp_directory) in test files. This properly excludes pytest assert statements and test fixture /tmp usage from security scanning while keeping other security checks active. Co-Authored-By: Claude Opus 4.5 --- .qlty/qlty.toml | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/.qlty/qlty.toml b/.qlty/qlty.toml index 9f2d960..ec9eaff 100644 --- a/.qlty/qlty.toml +++ b/.qlty/qlty.toml @@ -42,6 +42,31 @@ test_patterns = [ "**/*.spec.*", ] +# ============================================================================ +# Issue Triage - Rule-level Ignores +# ============================================================================ +# Ignore Bandit B101 (assert_used) in test files - pytest uses assert statements +[[triage]] +match.plugins = ["bandit"] +match.rules = ["bandit:B101"] +match.file_patterns = [ + "**/tests/**", + "**/test_*.py", + "**/*_test.py", +] +set.ignored = true + +# Ignore Bandit B108 (hardcoded_tmp_directory) in test files - test fixtures use /tmp +[[triage]] +match.plugins = ["bandit"] +match.rules = ["bandit:B108"] +match.file_patterns = [ + "**/tests/**", + "**/test_*.py", + "**/*_test.py", +] +set.ignored = true + # Default source for plugin definitions [[source]] name = "default" @@ -72,17 +97,8 @@ default = true [smells] # Mode: comment (add PR comments) vs block (fail CI) mode = "comment" - -# Ignore complexity warnings for files with documented intentional complexity -[[smells.ignore]] -rules = [ - "function-complexity", - "function-parameters", - "nested-control-flow", -] -file_patterns = [ - "**/gemini_image/generator.py", -] +# Note: Complexity warnings for gemini_image/generator.py are intentional +# and documented in the file header. Use exclude_patterns at top level if needed. # Boolean logic complexity [smells.boolean_logic] @@ -145,15 +161,6 @@ drivers = ["lint"] name = "bandit" mode = "comment" -# Ignore B101 (assert_used) in test files - pytest uses assert statements -[[plugin.ignore]] -rule = "bandit:B101" -file_patterns = [ - "**/tests/**", - "**/test_*.py", - "**/*_test.py", -] - # Shell script linting (comment mode - script styling preferences) [[plugin]] name = "shellcheck"