diff --git a/invokeai/app/api/routers/model_manager.py b/invokeai/app/api/routers/model_manager.py index f67393f2ea2..81d4a927e83 100644 --- a/invokeai/app/api/routers/model_manager.py +++ b/invokeai/app/api/routers/model_manager.py @@ -1044,10 +1044,14 @@ class HFTokenHelper: @classmethod def get_status(cls) -> HFTokenStatus: try: - if huggingface_hub.get_token_permission(huggingface_hub.get_token()): + token = huggingface_hub.get_token() + if token is None: + # No token set + return HFTokenStatus.INVALID + if huggingface_hub.get_token_permission(token): # Valid token! return HFTokenStatus.VALID - # No token set + # Token exists but has no permissions (shouldn't normally happen) return HFTokenStatus.INVALID except Exception: return HFTokenStatus.UNKNOWN diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 56ebbe7fd9d..580aad3286a 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -2,7 +2,7 @@ from typing import Iterator, Literal, Optional, Tuple, Union import torch -from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer, T5TokenizerFast +from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation from invokeai.app.invocations.fields import ( @@ -86,7 +86,7 @@ def _t5_encode(self, context: InvocationContext) -> torch.Tensor: ExitStack() as exit_stack, ): assert isinstance(t5_text_encoder, T5EncoderModel) - assert isinstance(t5_tokenizer, (T5Tokenizer, T5TokenizerFast)) + assert isinstance(t5_tokenizer, T5Tokenizer) # Determine if the model is quantized. # If the model is quantized, then we need to apply the LoRA weights as sidecar layers. This results in @@ -186,7 +186,7 @@ def _t5_lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[ModelP def _log_t5_tokenization( self, context: InvocationContext, - tokenizer: Union[T5Tokenizer, T5TokenizerFast], + tokenizer: T5Tokenizer, ) -> None: """Logs the tokenization of a prompt for a T5-based model like FLUX.""" diff --git a/invokeai/app/invocations/sd3_text_encoder.py b/invokeai/app/invocations/sd3_text_encoder.py index 24647c9cfc7..e2ade1ddf93 100644 --- a/invokeai/app/invocations/sd3_text_encoder.py +++ b/invokeai/app/invocations/sd3_text_encoder.py @@ -8,7 +8,6 @@ CLIPTokenizer, T5EncoderModel, T5Tokenizer, - T5TokenizerFast, ) from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation @@ -102,7 +101,7 @@ def _t5_encode(self, context: InvocationContext, max_seq_len: int) -> torch.Tens ): context.util.signal_progress("Running T5 encoder") assert isinstance(t5_text_encoder, T5EncoderModel) - assert isinstance(t5_tokenizer, (T5Tokenizer, T5TokenizerFast)) + assert isinstance(t5_tokenizer, T5Tokenizer) text_inputs = t5_tokenizer( prompt, diff --git a/invokeai/app/services/model_install/model_install_default.py b/invokeai/app/services/model_install/model_install_default.py index 77dc3dfa70a..240757a0368 100644 --- a/invokeai/app/services/model_install/model_install_default.py +++ b/invokeai/app/services/model_install/model_install_default.py @@ -16,7 +16,7 @@ import torch import yaml -from huggingface_hub import HfFolder +from huggingface_hub import get_token as hf_get_token from pydantic.networks import AnyHttpUrl from pydantic_core import Url from requests import Session @@ -750,7 +750,7 @@ def _import_from_hf( ) -> ModelInstallJob: # Add user's cached access token to HuggingFace requests if source.access_token is None: - source.access_token = HfFolder.get_token() + source.access_token = hf_get_token() remote_files, metadata = self._remote_files_from_source(source) return self._import_remote_model( source=source, diff --git a/invokeai/backend/image_util/safety_checker.py b/invokeai/backend/image_util/safety_checker.py index ab09a296197..1f2acc58c56 100644 --- a/invokeai/backend/image_util/safety_checker.py +++ b/invokeai/backend/image_util/safety_checker.py @@ -9,7 +9,7 @@ import numpy as np from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from PIL import Image, ImageFilter -from transformers import AutoFeatureExtractor +from transformers import AutoImageProcessor import invokeai.backend.util.logging as logger from invokeai.app.services.config.config_default import get_config @@ -36,14 +36,14 @@ def _load_safety_checker(cls): try: model_path = get_config().models_path / CHECKER_PATH if model_path.exists(): - cls.feature_extractor = AutoFeatureExtractor.from_pretrained(model_path) + cls.feature_extractor = AutoImageProcessor.from_pretrained(model_path) cls.safety_checker = StableDiffusionSafetyChecker.from_pretrained(model_path) else: model_path.mkdir(parents=True, exist_ok=True) - cls.feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id) - cls.feature_extractor.save_pretrained(model_path, safe_serialization=True) + cls.feature_extractor = AutoImageProcessor.from_pretrained(repo_id) + cls.feature_extractor.save_pretrained(model_path) cls.safety_checker = StableDiffusionSafetyChecker.from_pretrained(repo_id) - cls.safety_checker.save_pretrained(model_path, safe_serialization=True) + cls.safety_checker.save_pretrained(model_path) except Exception as e: logger.warning(f"Could not load NSFW checker: {str(e)}") diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 2de51a8acae..be9f6e457e2 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -13,7 +13,7 @@ CLIPTextModel, CLIPTokenizer, T5EncoderModel, - T5TokenizerFast, + T5Tokenizer, ) from invokeai.app.services.config.config_default import get_config @@ -409,7 +409,7 @@ def _load_model( ) match submodel_type: case SubModelType.Tokenizer2 | SubModelType.Tokenizer3: - return T5TokenizerFast.from_pretrained( + return T5Tokenizer.from_pretrained( Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True ) case SubModelType.TextEncoder2 | SubModelType.TextEncoder3: @@ -437,8 +437,11 @@ def _load_state_dict_into_t5(cls, model: T5EncoderModel, state_dict: dict[str, t missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False, assign=True) assert len(unexpected_keys) == 0 assert set(missing_keys) == {"encoder.embed_tokens.weight"} - # Assert that the layers we expect to be shared are actually shared. - assert model.encoder.embed_tokens.weight is model.shared.weight + # Re-tie shared weights. In transformers 5.x, weight tying is implemented at the + # parameter level (via _tie_weights / tie_weights) rather than as a Python object + # alias. load_state_dict(assign=True) replaces parameters in-place, which severs + # the parameter-level tie. Calling tie_weights() re-establishes it. + model.tie_weights() @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder) @@ -455,7 +458,7 @@ def _load_model( match submodel_type: case SubModelType.Tokenizer2 | SubModelType.Tokenizer3: - return T5TokenizerFast.from_pretrained( + return T5Tokenizer.from_pretrained( Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True ) case SubModelType.TextEncoder2 | SubModelType.TextEncoder3: diff --git a/invokeai/backend/model_manager/load/model_util.py b/invokeai/backend/model_manager/load/model_util.py index c3477fa6603..0c6c8f7f4ab 100644 --- a/invokeai/backend/model_manager/load/model_util.py +++ b/invokeai/backend/model_manager/load/model_util.py @@ -10,7 +10,7 @@ import torch from diffusers.pipelines.pipeline_utils import DiffusionPipeline from diffusers.schedulers.scheduling_utils import SchedulerMixin -from transformers import CLIPTokenizer, PreTrainedTokenizerBase, T5Tokenizer, T5TokenizerFast +from transformers import CLIPTokenizer, PreTrainedTokenizerBase, T5Tokenizer from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline @@ -64,10 +64,7 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int: return 0 elif isinstance( model, - ( - T5TokenizerFast, - T5Tokenizer, - ), + T5Tokenizer, ): # HACK(ryand): len(model) just returns the vocabulary size, so this is blatantly wrong. It should be small # relative to the text encoder that it's used with, so shouldn't matter too much, but we should fix this at some diff --git a/invokeai/backend/model_manager/metadata/fetch/huggingface.py b/invokeai/backend/model_manager/metadata/fetch/huggingface.py index 1b2b6c36742..0734a4e11af 100644 --- a/invokeai/backend/model_manager/metadata/fetch/huggingface.py +++ b/invokeai/backend/model_manager/metadata/fetch/huggingface.py @@ -16,10 +16,11 @@ import json import re from pathlib import Path +from types import SimpleNamespace from typing import Optional import requests -from huggingface_hub import HfApi, configure_http_backend, hf_hub_url +from huggingface_hub import HfApi, hf_hub_url from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError from pydantic.networks import AnyHttpUrl from requests.sessions import Session @@ -47,7 +48,7 @@ def __init__(self, session: Optional[Session] = None): this module without an internet connection. """ self._requests = session or requests.Session() - configure_http_backend(backend_factory=lambda: self._requests) + self._has_custom_session = session is not None @classmethod def from_json(cls, json: str) -> HuggingFaceMetadata: @@ -55,6 +56,30 @@ def from_json(cls, json: str) -> HuggingFaceMetadata: metadata = HuggingFaceMetadata.model_validate_json(json) return metadata + def _model_info_via_session(self, repo_id: str, variant: Optional[ModelRepoVariant] = None) -> SimpleNamespace: + """Fetch model info using the injected requests session (for testing/custom backends).""" + params = {"blobs": "true"} + url = f"https://huggingface.co/api/models/{repo_id}" + if variant is not None: + url += f"/revision/{variant}" + resp = self._requests.get(url, params=params) + if resp.status_code == 404: + error_code = resp.headers.get("X-Error-Code", "") + if error_code == "RevisionNotFound" or (variant is not None): + raise RevisionNotFoundError(f"Revision '{variant}' not found for repo '{repo_id}'.") + raise RepositoryNotFoundError(f"Repository '{repo_id}' not found.") + resp.raise_for_status() + data = resp.json() + # Convert siblings dicts to SimpleNamespace objects matching HfApi.model_info() shape + siblings = [] + for s in data.get("siblings", []): + siblings.append(SimpleNamespace( + rfilename=s.get("rfilename"), + size=s.get("size") or (s.get("lfs", {}) or {}).get("size"), + lfs=s.get("lfs"), + )) + return SimpleNamespace(id=data["id"], siblings=siblings) + def from_id(self, id: str, variant: Optional[ModelRepoVariant] = None) -> AnyModelRepoMetadata: """Return a HuggingFaceMetadata object given the model's repo_id.""" # Little loop which tries fetching a revision corresponding to the selected variant. @@ -67,7 +92,12 @@ def from_id(self, id: str, variant: Optional[ModelRepoVariant] = None) -> AnyMod repo_id = id.split("::")[0] or id while not model_info: try: - model_info = HfApi().model_info(repo_id=repo_id, files_metadata=True, revision=variant) + # Use the injected session when provided (supports testing with mock adapters). + # Otherwise use HfApi which uses httpx internally. + if self._has_custom_session: + model_info = self._model_info_via_session(repo_id, variant) + else: + model_info = HfApi().model_info(repo_id=repo_id, files_metadata=True, revision=variant) except RepositoryNotFoundError as excp: raise UnknownMetadataException(f"'{repo_id}' not found. See trace for details.") from excp except RevisionNotFoundError: diff --git a/invokeai/backend/model_manager/metadata/metadata_base.py b/invokeai/backend/model_manager/metadata/metadata_base.py index e16ad4cbc47..b048144e547 100644 --- a/invokeai/backend/model_manager/metadata/metadata_base.py +++ b/invokeai/backend/model_manager/metadata/metadata_base.py @@ -17,7 +17,7 @@ from pathlib import Path from typing import List, Literal, Optional, Union -from huggingface_hub import configure_http_backend, hf_hub_url +from huggingface_hub import hf_hub_url from pydantic import BaseModel, Field, TypeAdapter from pydantic.networks import AnyHttpUrl from requests.sessions import Session @@ -111,7 +111,6 @@ def download_urls( full-precision model is returned. """ session = session or Session() - configure_http_backend(backend_factory=lambda: session) # used in testing paths = filter_files([x.path for x in self.files], variant, subfolder, subfolders) # all files in the model diff --git a/invokeai/backend/quantization/scripts/quantize_t5_xxl_bnb_llm_int8.py b/invokeai/backend/quantization/scripts/quantize_t5_xxl_bnb_llm_int8.py index 2e610404cdc..1bcb1227fe8 100644 --- a/invokeai/backend/quantization/scripts/quantize_t5_xxl_bnb_llm_int8.py +++ b/invokeai/backend/quantization/scripts/quantize_t5_xxl_bnb_llm_int8.py @@ -15,8 +15,11 @@ def load_state_dict_into_t5(model: T5EncoderModel, state_dict: dict): missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False, assign=True) assert len(unexpected_keys) == 0 assert set(missing_keys) == {"encoder.embed_tokens.weight"} - # Assert that the layers we expect to be shared are actually shared. - assert model.encoder.embed_tokens.weight is model.shared.weight + # Re-tie shared weights. In transformers 5.x, weight tying is implemented at the + # parameter level (via _tie_weights / tie_weights) rather than as a Python object + # alias. load_state_dict(assign=True) replaces parameters in-place, which severs + # the parameter-level tie. Calling tie_weights() re-establishes it. + model.tie_weights() def main(): diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index de5253f0733..054e04dcb28 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -17,7 +17,7 @@ from diffusers.schedulers.scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin from diffusers.utils.import_utils import is_xformers_available from pydantic import Field -from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer +from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer from invokeai.app.services.config.config_default import get_config from invokeai.backend.stable_diffusion.diffusion.conditioning_data import IPAdapterData, TextConditioningData @@ -139,7 +139,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. Please, refer to the [model card](https://huggingface.co/CompVis/stable-diffusion-v1-4) for details. - feature_extractor ([`CLIPFeatureExtractor`]): + feature_extractor ([`CLIPImageProcessor`]): Model that extracts features from generated images to be used as inputs for the `safety_checker`. """ @@ -151,7 +151,7 @@ def __init__( unet: UNet2DConditionModel, scheduler: KarrasDiffusionSchedulers, safety_checker: Optional[StableDiffusionSafetyChecker], - feature_extractor: Optional[CLIPFeatureExtractor], + feature_extractor: Optional[CLIPImageProcessor], requires_safety_checker: bool = False, ): super().__init__( diff --git a/pyproject.toml b/pyproject.toml index adfe5982baf..2fac347f320 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ dependencies = [ "torch~=2.7.0", # torch and related dependencies are loosely pinned, will respect requirement of `diffusers[torch]` "torchsde", # diffusers needs this for SDE solvers, but it is not an explicit dep of diffusers "torchvision", - "transformers>=4.56.0", + "transformers>=5.1.0", # Core application dependencies, pinned for reproducible builds. "fastapi-events", @@ -123,7 +123,8 @@ dependencies = [ [tool.uv] # Prevent opencv-python from ever being chosen during dependency resolution. # This prevents conflicts with opencv-contrib-python, which Invoke requires. -override-dependencies = ["opencv-python; sys_platform=='never'"] +# Force transformers>=5.1.0 past compel==2.1.1's ~=4.25 (<5.0) constraint. +override-dependencies = ["opencv-python; sys_platform=='never'", "transformers>=5.1.0"] conflicts = [[{ extra = "cpu" }, { extra = "cuda" }, { extra = "rocm" }]] index-strategy = "unsafe-best-match"