diff --git a/clams/app/__init__.py b/clams/app/__init__.py index 1d3f0a2..0519200 100644 --- a/clams/app/__init__.py +++ b/clams/app/__init__.py @@ -9,11 +9,11 @@ from datetime import datetime from urllib import parse as urlparser -__all__ = ['ClamsApp'] +__all__ = ['ClamsApp', 'ClamsPromptableApp', 'ClamsHFPromptableApp'] -from typing import Union, Any, Optional, Dict, List, Tuple +from typing import Union, Any, Optional, Dict, List, Tuple, cast -from mmif import Mmif, Document, DocumentTypes, View +from mmif import Mmif, Document, DocumentTypes, View, AnnotationTypes from mmif.utils.video_document_helper import ( SamplingMode, SAMPLING_MODE_DESCRIPTIONS, SAMPLING_MODE_DEFAULT, _sampling_mode, @@ -75,7 +75,7 @@ class ClamsApp(ABC): # how vdh.extract_frames_by_mode() selects frames from TimeFrames. # The value is intercepted in annotate() and pushed into a # contextvars.ContextVar so that any vdh call inside _annotate() - # picks it up automatically — app developers never need to handle + # picks it up automatically; app developers never need to handle # this parameter themselves. { 'name': 'tfSamplingMode', 'type': 'string', @@ -116,7 +116,7 @@ def appmetadata(self, **kwargs: List[str]) -> str: """ # cast only, no refinement casted = self.metadata_param_caster.cast(kwargs) - pretty = casted.pop('pretty') if 'pretty' in casted else False + pretty = casted.get('pretty', False) return self.metadata.jsonify(pretty) def _load_appmetadata(self) -> AppMetadata: @@ -131,7 +131,7 @@ def _load_appmetadata(self) -> AppMetadata: In any case, :class:`~clams.appmetadata.AppMetadata` class must be useful. For metadata specification, - see `https://sdk.clams.ai/appmetadata.jsonschema <../appmetadata.jsonschema>`_. + see `https://clams.ai/clams-python/appmetadata.jsonschema <../appmetadata.jsonschema>`_. """ cwd = pathlib.Path(sys.modules[self.__module__].__file__).parent @@ -185,7 +185,7 @@ def annotate(self, mmif: Union[str, dict, Mmif], **runtime_params: List[str]) -> refined = self._refine_params(**runtime_params) self.logger.debug(f"Refined parameters: {refined}") pretty = refined.get('pretty', False) - sampling_mode_str = refined.pop('tfSamplingMode', None) + sampling_mode_str = refined.get('tfSamplingMode', None) if sampling_mode_str is not None: _sampling_mode.set(SamplingMode(sampling_mode_str)) t = datetime.now() @@ -309,6 +309,8 @@ def sign_view(self, view: View, runtime_conf: dict) -> None: :param runtime_conf: runtime configuration of the app as k-v pairs """ view.metadata.app = str(self.metadata.identifier) + if self.metadata.app_tags: + view.metadata.set_additional_property('appTags', list(self.metadata.app_tags)) params_map = {p.name: p for p in self.metadata.parameters} if self._RAW_PARAMS_KEY in runtime_conf: for k, v in runtime_conf.items(): @@ -639,6 +641,771 @@ def open_document_location(document: Union[str, Document], opener: Any = open, * raise FileNotFoundError(p.path) +# TODO (krim @ 05/28/26): maybe we should consider implementing +# autodoc-based auto documentation export (e.g., ``automethod`` for +# methods and a small Sphinx extension to render +# ``promptable_parameters`` into the parameter table), instead of the +# current hand-authored ``documentation/app-baseclasses.rst``. +class ClamsPromptableApp(ClamsApp): + """ + Base class for CLAMS apps that wrap a promptable model (an LLM or + other multimodal model, local or remote). Standardizes the runtime + parameter surface (prompt, generation hyperparameters, parallelism + control) and provides helpers for building chat conversations and + persisting model responses into MMIF. + + The standardized parameters are listed in + :py:attr:`promptable_parameters` and added to an app's metadata via + :py:meth:`inject_promptable_parameters`. Promptable-app developers + MUST call that helper at the end of their ``appmetadata()`` function + in ``metadata.py``. The reservation rule (these parameter names are + SDK-managed and apps cannot redeclare them) is enforced implicitly + via :py:meth:`AppMetadata.add_parameter`'s existing duplicate-name + check. + + Inference is performed by :py:meth:`generate`, which subclasses MUST + implement. The base class provides: + + * :py:meth:`inject_promptable_parameters` : adds the SDK-managed + parameter set to ``AppMetadata`` + * :py:meth:`build_conversation` : assembles a chat-template-compatible + message list from a prompt plus optional images/audios + * :py:meth:`response_to_grounded_textdocument` : persists a + generated response into a view as ``TextDocument`` + + ``Alignment`` (+ optional ``origins`` / ``origination``) + """ + + #: SDK-managed runtime parameters injected into every promptable app. + #: These names are reserved; apps cannot redeclare them with + #: customized specs. + promptable_parameters = [ + { + 'name': 'prompt', 'type': 'string', 'multivalued': True, + 'description': + 'User prompt(s) sent to the model. A single value runs as a ' + 'one-shot generation. A multi-value list is interpreted as a ' + 'multi-turn static prompt; see ``promptMode`` for how turns ' + 'are assembled.', + }, + { + 'name': 'systemPrompt', 'type': 'string', 'default': '', + 'description': + 'Optional system-role text prepended to the conversation. ' + 'Empty by default.', + }, + { + 'name': 'promptMode', 'type': 'string', + 'choices': ['user-only', 'turn-taking'], + 'default': 'turn-taking', + 'description': + 'How to interpret a multi-value ``prompt`` list. ' + 'Has no effect when ``prompt`` has a single value. ' + 'For semantics of each choice and worked examples, see ' + 'https://clams.ai/clams-python/app-baseclasses.html#promptable-multiturn', + }, + { + 'name': 'maxNewTokens', 'type': 'integer', 'default': 512, + 'description': + 'Maximum number of new tokens generated per inference call. ' + 'Forwarded to the backend\'s ``generate``-equivalent. Larger ' + 'values grow the KV cache linearly and increase GPU memory ' + 'usage; reduce if VRAM is constrained.', + }, + { + 'name': 'temperature', 'type': 'number', 'default': 0.0, + 'description': + 'Sampling temperature. The default ``0.0`` selects ' + 'deterministic / greedy decoding for maximum reproducibility; ' + 'override for sampled generation.', + }, + { + 'name': 'topP', 'type': 'number', 'default': 1.0, + 'description': + 'Nucleus-sampling cumulative probability cutoff. Only ' + 'meaningful when ``temperature`` is greater than 0.', + }, + { + 'name': 'topK', 'type': 'integer', 'default': 50, + 'description': + 'Top-K sampling cutoff. Only meaningful when ``temperature`` ' + 'is greater than 0.', + }, + { + 'name': 'parallelPrompts', 'type': 'integer', 'default': 1, + 'description': + 'Number of independent prompts the app runs in parallel ' + '(stacks into a single forward pass). The *size* of each ' + 'prompt (how many images, how long the system/user text ' + 'is, etc.) is NOT regulated by this parameter; that is ' + 'each app\'s responsibility. Prompt count and per-prompt ' + 'content size combine multiplicatively for GPU memory, ' + 'so the two can blow up together. Catastrophic example: ' + '``tfSamplingMode=all`` on a TimeFrame without ' + '``targets`` expands that TF into one image per ' + 'native-FPS frame (300 images for a 10-second TF at ' + '30fps); ``parallelPrompts=4`` then runs 4 such prompts ' + 'in one forward pass (~1200 images), guaranteed OOM. ' + 'Keep at ``1`` on memory-tight setups; raise only when ' + 'per-prompt content is small and bounded.', + }, + ] + + @staticmethod + def inject_promptable_parameters(metadata: AppMetadata) -> None: + """ + Add the SDK-managed promptable parameters to ``metadata``. Call + this at the end of your app's ``appmetadata()`` function in + ``metadata.py`` if your app subclasses + :py:class:`ClamsPromptableApp`. + + The reservation rule is enforced implicitly: if the app had + already called ``metadata.add_parameter('prompt', ...)`` (or + any other promptable name) before this helper, the helper's own + ``add_parameter`` call will trip the existing duplicate-name + ``ValueError`` in :py:meth:`AppMetadata.add_parameter`. + + :param metadata: the :class:`AppMetadata` instance being built + """ + for param in ClamsPromptableApp.promptable_parameters: + metadata.add_parameter(**param) + + def __init__(self): + # ``ClamsApp.__init__`` loads the app's ``metadata.py``, which + # is expected to have already called + # ``inject_promptable_parameters()`` from inside + # ``appmetadata()``. The parent ``__init__`` then iterates + # ``self.metadata.parameters`` to populate + # ``annotate_param_spec`` and build the caster, so the + # promptable parameters are already covered by the time we land + # here. We only validate that the helper was actually called. + super().__init__() + declared = {p.name for p in self.metadata.parameters} + expected = {p['name'] for p in ClamsPromptableApp.promptable_parameters} + missing = expected - declared + if missing: + raise ValueError( + f"Promptable parameters {sorted(missing)} are missing " + f"from the app metadata. Promptable apps must call " + f"``ClamsPromptableApp.inject_promptable_parameters(" + f"metadata)`` inside their ``appmetadata()`` function " + f"in ``metadata.py``." + ) + + @abstractmethod + def generate( + self, + prompt: List[str], + system_prompt: str = '', + images: Optional[List[List[Any]]] = None, + audios: Optional[List[List[Any]]] = None, + prompt_mode: str = 'turn-taking', + **generation_params, + ) -> List[str]: + """ + Run N independent prompts in one inference call and return N + outputs. Subclasses MUST implement this. + + Each inner list of ``images`` / ``audios`` is the bundled + multimodal content for ONE prompt -- the model sees those + items as one composite input and produces one output. The + outer list spans N prompts processed in parallel (when the + backend supports it; sequentially otherwise). + + * Single-prompt call: ``images=[[img1, img2]]`` -> one output + (composite over the two bundled images). + * Per-input broadcast: ``images=[[img1], [img2], [img3]]`` -> + three outputs (one per image). Caller assembles the + singleton-wrap shape. + * Multimodal pair: ``images=[[img1]], audios=[[au1]]`` -> one + output. When both ``images`` and ``audios`` are given they + must have the same outer length; index ``i`` of each pairs + into prompt ``i``. + + :param prompt: a ``List[str]`` of prompt turns. A + single-element list is one-shot. A multi-element list is + multi-turn and is assembled according to ``prompt_mode``. + :param system_prompt: optional system-role text prepended to + the conversation. Applies to every prompt in the batch. + :param images: optional ``List[List[Any]]`` -- N groups, one + per prompt; each inner list is the bundled images for that + prompt. + :param audios: optional ``List[List[Any]]`` -- N groups, one + per prompt; each inner list is the bundled audio clips + for that prompt. + :param prompt_mode: ``"turn-taking"`` (default) or + ``"user-only"``; see :py:attr:`promptable_parameters`. + :param generation_params: any additional backend-specific + generation kwargs (``maxNewTokens``, ``temperature``, + ``topP``, ``topK``, etc.). + :return: a ``List[str]`` with one entry per prompt in the + batch. For ``prompt_mode='user-only'`` multi-turn, each + prompt's entry is the assistant's final reply across its + N user turns. + :rtype: List[str] + """ + raise NotImplementedError + + def build_conversation( + self, + prompt: Union[str, List[str], List[dict]], + system_prompt: str = '', + images: Optional[List[Any]] = None, + audios: Optional[List[Any]] = None, + prompt_mode: str = 'turn-taking', + ) -> Union[List[dict], List[List[dict]]]: + """ + Build a chat-template-compatible message list. + + :param prompt: a plain string, a ``List[str]`` of prompt turns, + or a pre-built ``List[dict]`` of role/content message + objects (returned as-is; pass-through for advanced + callers that constructed the conversation themselves). + :param system_prompt: if non-empty, prepended as a + system-role message. + :param images: optional list of image inputs to include in the + (final) user turn's content. Each appears as a + ``{'type': 'image', 'image': }`` entry. + :param audios: optional list of audio inputs to include in the + (final) user turn's content. Each appears as a + ``{'type': 'audio', 'audio': }`` entry. + :param prompt_mode: ``"turn-taking"`` (default) or + ``"user-only"``. Only meaningful when ``prompt`` is a + multi-element list; ignored otherwise. See + :py:attr:`promptable_parameters` for semantics. + + :returns: + * For single-shot prompts (string or single-element list) + and for multi-element ``turn-taking`` mode: a single + ``List[dict]`` of role/content messages, ready to feed + to a chat-template applier (e.g., + ``processor.apply_chat_template``). + * For multi-element ``user-only`` mode: a + ``List[List[dict]]`` of N progressively-extending + conversation prefixes, one per user turn. Each prefix + ends in a user turn; assistant turns between users are + stored with ``content=None`` as placeholders for the + caller to fill in with successive generation results. + + Subclasses may override to access model-specific state + (``self.processor``, ``self.tokenizer``, etc.) during + formatting; the base implementation is back-end-agnostic. + """ + # Pass-through for pre-built message lists. + if isinstance(prompt, list) and prompt and all( + isinstance(p, dict) for p in prompt): + return cast(List[dict], prompt) + + # Normalize to List[str]. + if isinstance(prompt, str): + prompts = [prompt] + else: + prompts = list(prompt) + + if len(prompts) == 1: + return self._build_single_turn( + prompts[0], system_prompt, images, audios) + + if prompt_mode == 'turn-taking': + return self._build_turn_taking( + prompts, system_prompt, images, audios) + if prompt_mode == 'user-only': + return self._build_user_only( + prompts, system_prompt, images, audios) + raise ValueError( + f"Unknown prompt_mode: {prompt_mode!r}. " + f"Expected 'turn-taking' or 'user-only'.") + + @staticmethod + def _make_user_content(text, images=None, audios=None): + """Build the content list for a user-role message.""" + content = [] + if images: + for img in images: + content.append({'type': 'image', 'image': img}) + if audios: + for au in audios: + content.append({'type': 'audio', 'audio': au}) + content.append({'type': 'text', 'text': text}) + return content + + def _build_single_turn(self, text, system_prompt, images, audios): + messages = [] + if system_prompt: + messages.append({'role': 'system', 'content': system_prompt}) + messages.append({ + 'role': 'user', + 'content': self._make_user_content(text, images, audios), + }) + return messages + + def _build_turn_taking(self, prompts, system_prompt, images, audios): + """ + Alternating user/assistant turns; one inference call. + Even indices in ``prompts`` are user turns, odd indices are + pre-written assistant exemplars. Images/audios (if any) are + attached to the final user turn (the actual query). + """ + messages = [] + if system_prompt: + messages.append({'role': 'system', 'content': system_prompt}) + # index of the final user turn (the last even index) + last_user_idx = (len(prompts) - 1) - ((len(prompts) - 1) % 2) + for i, text in enumerate(prompts): + role = 'user' if i % 2 == 0 else 'assistant' + if role == 'user': + attach_media = (i == last_user_idx) + content = self._make_user_content( + text, + images if attach_media else None, + audios if attach_media else None, + ) + messages.append({'role': 'user', 'content': content}) + else: + messages.append({'role': 'assistant', 'content': text}) + return messages + + def _build_user_only(self, prompts, system_prompt, images, audios): + """ + N progressively-extending conversation prefixes, one per user + turn. Assistant slots between users have ``content=None`` as + placeholders for the caller's successive generation results. + """ + convs: List[List[dict]] = [] + base: List[dict] = [] + if system_prompt: + base.append({'role': 'system', 'content': system_prompt}) + for i, text in enumerate(prompts): + # First user turn carries the images/audios (the initial query); + # subsequent user turns are text-only. + user_content = self._make_user_content( + text, + images if i == 0 else None, + audios if i == 0 else None, + ) + base.append({'role': 'user', 'content': user_content}) + # Snapshot the conversation as it stands at the start of + # the i-th generation call. Shallow-copy each message so + # later in-place edits (e.g., filling in the assistant + # placeholder) don't retroactively mutate earlier + # snapshots. + convs.append([dict(m) for m in base]) + if i < len(prompts) - 1: + base.append({'role': 'assistant', 'content': None}) + return convs + + def response_to_grounded_textdocument( + self, + view: View, + source: str, + response: str, + origins: Optional[List[str]] = None, + origination: Optional[str] = None, + reasoning_trace: Optional[str] = None, + ) -> Tuple[Any, Any]: + """ + Persist a single LLM text response into a view. Writes one + ``TextDocument`` (containing the response) plus possible + grounding via an ``Alignment`` annotation and ``origins`` / + ``origination`` properties on the TD. + + The two grounding link kinds are semantically distinct: + + * ``source`` is the *coarse* cross-modal grounding -- the + single annotation id that the response is anchored to. + Written into the new ``Alignment`` (``source -> td``). + Typical value: the parent ``TimeFrame`` for a + captioning/OCR app. + * ``origins`` are the *finer* derivation grounding -- a list + of annotation ids the response was specifically derived + from (e.g. the ``TimePoint``\\s whose frames were fed to + the model). Written into ``TextDocument.origins``. See + https://clams.ai/clams-vocabulary/Document for vocabulary + semantics. + + :param view: the :class:`View` to write into. The caller is + responsible for having called + :meth:`View.new_contain` for ``TextDocument`` and + ``Alignment`` first if needed. + :param source: ``id`` of the annotation to record as the + cross-modal anchor of the response (see above). + :param response: the text generated by the model. + :param origins: optional list of ``id``\\s of annotations the + response was *derived* from. Must be paired with + ``origination``. + :param origination: nature of the derivation, written to + ``TextDocument.origination``. Accepted values per the + vocabulary include ``'derived'``, ``'transcription'``, + ``'topologically-identical'``. Must be paired with + ``origins``. + :param reasoning_trace: optional model-side reasoning trace + (a chain-of-thought / scratchpad string, NOT a Python + traceback). NOT YET SUPPORTED -- passing a non-``None`` + value raises :py:class:`NotImplementedError`. Storage + convention is still being decided at + clamsproject/clams-python#263. + :return: ``(TextDocument, Alignment)`` tuple of the new + annotations. + :raises ValueError: if exactly one of ``origins`` / + ``origination`` is set; they must be supplied together + or both omitted. + """ + if bool(origins) != bool(origination): + raise ValueError( + "`origins` and `origination` must be supplied together " + "or both omitted; got " + f"origins={origins!r}, origination={origination!r}." + ) + td = view.new_textdocument(text=response) + if origins: + td.add_property('origins', origins) + td.add_property('origination', origination) + align = view.new_annotation( + AnnotationTypes.Alignment, + source=source, + target=td.id, + ) + if reasoning_trace is not None: + raise NotImplementedError( + "Reasoning-trace storage convention is not yet defined; " + "tracked at clamsproject/clams-python#263." + ) + return td, align + + +class ClamsHFPromptableApp(ClamsPromptableApp): + """ + Base class for promptable CLAMS apps backed by a local + HuggingFace ``transformers`` model. Layers HF-specific inference + plumbing on top of :class:`ClamsPromptableApp`: model loading + via :func:`clams.backends.hf.load_hf_model`, and a concrete + :py:meth:`generate` implementation that runs N independent + prompts in one HF forward pass via the standard + chat-template -> ``model.generate`` -> ``batch_decode`` pipeline. + + Concrete subclasses declare the model class via :py:attr:`MODEL_CLS` + plus a handful of optional dtype/padding hints, and the family of + pinned model revisions via ``analyzer_versions`` in + ``metadata.py``. The SDK auto-derives a ``model`` runtime + parameter (choices = keys of ``analyzer_versions``), and the dev's + ``_annotate`` calls :py:meth:`load_model` to (lazily) load the + requested family member. Singleton families (one entry in + ``analyzer_versions``) eagerly pre-load in ``__init__`` so + single-model apps preserve warm-start semantics. Example:: + + class MyVLMCaptioner(ClamsHFPromptableApp): + MODEL_CLS = AutoModelForImageTextToText + DTYPE = torch.bfloat16 + PADDING_SIDE = 'left' + + # In metadata.py: + # analyzer_versions={ + # "HuggingFaceTB/SmolVLM2-2.2B-Instruct": "482adb5", + # } + # plus a call to + # ClamsHFPromptableApp.inject_promptable_parameters(metadata). + + def _annotate(self, mmif, **parameters): + self.load_model(parameters['model']) + # ... self.generate(prompt, images=image_groups, ...) + # ... self.response_to_grounded_textdocument(...) + ... + + Requires the ``[hf]`` extra (``pip install clams-python[hf]``). + """ + + #: ``transformers`` model class (e.g. + #: :class:`~transformers.AutoModelForImageTextToText`, + #: :class:`~transformers.AutoModelForCausalLM`). Subclasses MUST + #: set this. + MODEL_CLS: Optional[Any] = None + #: ``transformers`` processor / tokenizer / feature-extractor + #: class. Defaults to :class:`~transformers.AutoProcessor` (set + #: by :func:`clams.backends.hf.load_hf_model` when ``None``). + PROCESSOR_CLS: Optional[Any] = None + #: Torch dtype for the model (e.g. ``torch.bfloat16``). When + #: ``None``, the model class's own default is used (typically + #: float32). Also used to cast ``pixel_values`` in + #: :py:meth:`generate`. + DTYPE: Optional[Any] = None + #: Tokenizer padding side. Set to ``'left'`` for decoder-only + #: batched generation; leave ``None`` otherwise. + PADDING_SIDE: Optional[str] = None + #: Extra kwargs forwarded to ``MODEL_CLS.from_pretrained()``. + MODEL_KWARGS: Optional[dict] = None + #: Extra kwargs forwarded to ``PROCESSOR_CLS.from_pretrained()``. + PROCESSOR_KWARGS: Optional[dict] = None + + @staticmethod + def inject_promptable_parameters(metadata: AppMetadata) -> None: + """ + Add the SDK-managed promptable parameters AND a ``model`` + parameter derived from ``metadata.analyzer_versions`` to the + app metadata. Overrides + :py:meth:`ClamsPromptableApp.inject_promptable_parameters` for + HF apps; call this at the end of your app's ``appmetadata()`` + function in ``metadata.py`` if your app subclasses + :py:class:`ClamsHFPromptableApp`. + + :param metadata: the :class:`AppMetadata` instance being + built. ``metadata.analyzer_versions`` MUST already be set + to a non-empty ``Dict[str, str]`` (model id -> commit + hash); this helper reads it to derive the ``model`` + parameter's choices. + :raises ValueError: if ``metadata.analyzer_versions`` is + missing or empty. + """ + ClamsPromptableApp.inject_promptable_parameters(metadata) + analyzer_versions = metadata.analyzer_versions or {} + if not analyzer_versions: + raise ValueError( + "ClamsHFPromptableApp.inject_promptable_parameters " + "requires ``metadata.analyzer_versions`` to be a " + "non-empty dict (HF model id -> commit hash). Set " + "it on the ``AppMetadata`` constructor call before " + "invoking this helper.") + choices = list(analyzer_versions.keys()) + default = choices[0] if len(choices) == 1 else None + metadata.add_parameter( + name='model', + type='string', + choices=choices, + default=default, + multivalued=False, + description=( + "HuggingFace model identifier to use for this " + "request. Must be one of the model ids declared in " + "this app's ``analyzer_versions``; the SDK pins the " + "corresponding commit hash at load time. When the " + "app ships a single model (the typical case), this " + "parameter defaults to that one model and can be " + "omitted. Pass the full HF model id (e.g. " + "``org/repo-name``); URL-encoding the ``/`` is " + "optional." + ), + ) + + def __init__(self): + super().__init__() + cls_name = type(self).__name__ + if self.MODEL_CLS is None: + raise ValueError( + f"{cls_name} must set the ``MODEL_CLS`` class attribute " + f"(a ``transformers`` model class).") + analyzer_versions = self.metadata.analyzer_versions + if not analyzer_versions: + raise ValueError( + f"{cls_name} must declare ``analyzer_versions`` in " + f"``metadata.py`` as a non-empty Dict[str, str] " + f"mapping HuggingFace model ids to pinned commit " + f"hashes (7-char abbreviation is sufficient). This is " + f"required for reproducibility: an unpinned download " + f"silently floats on whatever ``main`` points at and " + f"cannot be reproduced. Singleton families (one " + f"entry) are fine; multi-model families list every " + f"member.") + if 'model' not in {p.name for p in self.metadata.parameters}: + raise ValueError( + f"{cls_name} must call " + f"``ClamsHFPromptableApp.inject_promptable_parameters" + f"(metadata)`` (the HF override that also adds the " + f"``model`` parameter) inside ``appmetadata()`` in " + f"``metadata.py``; calling " + f"``ClamsPromptableApp.inject_promptable_parameters`` " + f"directly skips the ``model`` parameter and trips " + f"this check.") + #: Per-(model_id, revision) cache of loaded + #: ``(processor, model, device)`` triples. Populated by + #: :py:meth:`load_model`; survives for the lifetime of this + #: app instance. + self._model_cache: Dict[Tuple[str, str], Tuple[Any, Any, str]] = {} + #: References to the currently-active loaded model. Set by + #: :py:meth:`load_model`; ``generate()`` and friends read + #: from here. ``None`` until the first ``load_model`` call + #: (or until ``__init__`` eager-loads a singleton family). + self.processor: Any = None + self.model: Any = None + self.device: Optional[str] = None + # Singleton families pre-load in ``__init__`` so single-model + # apps preserve warm-start UX (no first-request latency cost). + # Multi-member families defer to lazy loading on the first + # ``load_model`` call. + if len(analyzer_versions) == 1: + only_model_id = next(iter(analyzer_versions.keys())) + self.load_model(only_model_id) + + def _refine_params(self, **runtime_params): + """ + Expand ``model`` from the raw HF id (``org/name``) to + ``org/name@`` so the resolved revision lands in + ``view.metadata.appConfiguration['model']``. + """ + refined = super()._refine_params(**runtime_params) + model_id = refined.get('model') + if isinstance(model_id, str) and '@' not in model_id: + revision = (self.metadata.analyzer_versions or {}).get(model_id) + if revision is not None: + refined['model'] = f"{model_id}@{revision}" + return refined + + def load_model( + self, model_id_or_with_rev: str, + ) -> Tuple[Any, Any, str]: + """ + Load (or return cached) ``(processor, model, device)`` for + the given model id. Accepts both refined (``org/name@rev``) + and raw (``org/name``) forms; for raw form, the revision is + looked up from ``self.metadata.analyzer_versions``. Caches + results per ``(model_id, revision)`` and updates + :py:attr:`self.processor`, :py:attr:`self.model`, + :py:attr:`self.device` to the loaded triple so subsequent + :py:meth:`generate` calls operate on it. + + :param model_id_or_with_rev: HF model id, optionally with + ``@`` suffix. + :return: ``(processor, model, device)`` tuple for the loaded + model. Same references are also stored on ``self``. + :raises KeyError: if a raw model id is passed and is not in + ``analyzer_versions``. + """ + if '@' in model_id_or_with_rev: + model_id, _, revision = model_id_or_with_rev.rpartition('@') + else: + model_id = model_id_or_with_rev + revision = self.metadata.analyzer_versions[model_id] + cache_key = (model_id, revision) + cached = self._model_cache.get(cache_key) + if cached is not None: + self.processor, self.model, self.device = cached + return cached + # Lazy import: avoids pulling torch/transformers into the base + # clams-python install. Apps using this class must have the + # ``[hf]`` extra installed. + from clams.backends.hf import load_hf_model + self.logger.info(f"Loading HF model from {model_id} @ {revision}") + triple = load_hf_model( + model_id, + self.MODEL_CLS, + processor_cls=self.PROCESSOR_CLS, + dtype=self.DTYPE, + padding_side=self.PADDING_SIDE, + revision=revision, + model_kwargs=self.MODEL_KWARGS, + processor_kwargs=self.PROCESSOR_KWARGS, + ) + self.logger.info(f"HF model loaded on {triple[2]}") + self._model_cache[cache_key] = triple + self.processor, self.model, self.device = triple + return triple + + def generate( + self, + prompt: List[str], + system_prompt: str = '', + images: Optional[List[List[Any]]] = None, + audios: Optional[List[List[Any]]] = None, + prompt_mode: str = 'turn-taking', + **generation_params, + ) -> List[str]: + """ + Default implementation of the + :py:meth:`ClamsPromptableApp.generate` contract for + HuggingFace ``transformers`` models. Runs N prompts in one + forward pass; returns N decoded strings. + + Each inner list of ``images`` / ``audios`` is the bundled + content for one prompt. When both ``images`` and ``audios`` + are given they must have the same outer length (multimodal + pairs are stitched by index). When both are ``None``, runs as + a single text-only prompt. + + The default body is the canonical HF chat-model pipeline: + :py:meth:`build_conversation` -> ``apply_chat_template`` -> + ``model.generate`` -> ``batch_decode``. Subclasses can + customize finer-grained pieces via + :py:meth:`build_conversation` (model-specific message shape) + and :py:meth:`build_gen_kwargs` (model-specific generation + kwargs) without touching this method. + """ + if images is not None and audios is not None: + if len(images) != len(audios): + raise ValueError( + f"images and audios must have the same outer length " + f"when both are given; got " + f"{len(images)} vs {len(audios)}.") + if images is not None: + n = len(images) + elif audios is not None: + n = len(audios) + else: + n = 1 # text-only single prompt + if n == 0: + return [] + gen_kwargs = self.build_gen_kwargs(**generation_params) + try: + conversations = [ + self.build_conversation( + prompt, system_prompt=system_prompt, + images=images[i] if images is not None else None, + audios=audios[i] if audios is not None else None, + prompt_mode=prompt_mode) + for i in range(n) + ] + inputs = self.processor.apply_chat_template( + conversations, + add_generation_prompt=True, + tokenize=True, + return_dict=True, + padding=True, + return_tensors="pt", + ) + inputs = inputs.to(self.device) + if (self.DTYPE is not None + and 'pixel_values' in inputs + and inputs['pixel_values'] is not None): + inputs['pixel_values'] = inputs['pixel_values'].to( + dtype=self.DTYPE) + generated_ids = self.model.generate(**inputs, **gen_kwargs) + input_len = inputs.input_ids.shape[1] + new_tokens = generated_ids[:, input_len:] + return self.processor.batch_decode( + new_tokens, skip_special_tokens=True) + except Exception as e: + self.logger.error( + f"Error processing batch: {e}", exc_info=True) + return [''] * n + + @staticmethod + def build_gen_kwargs( + max_new_tokens: int = 512, + temperature: float = 0.0, + top_p: float = 1.0, + top_k: int = 50, + **_unused, + ) -> dict: + """ + Translate the SDK's promptable-parameter values into + HuggingFace ``model.generate()`` kwargs. Greedy decoding + (``do_sample=False``) when ``temperature == 0.0``; sampled + decoding with the given ``top_p`` / ``top_k`` otherwise. + + Subclasses MAY override to add model-specific generation + kwargs (``num_beams``, ``repetition_penalty``, custom + stopping criteria, ``do_sample`` overrides, etc.). The base + implementation accepts any extra keyword args and silently + ignores them, so subclasses can pass through the full + ``**parameters`` dict from ``_annotate`` without filtering. + """ + gen_kwargs = {'max_new_tokens': max_new_tokens} + if temperature > 0: + gen_kwargs.update({ + 'do_sample': True, + 'temperature': temperature, + 'top_p': top_p, + 'top_k': top_k, + }) + return gen_kwargs + + class ParameterCaster(object): def __init__(self, param_spec: Dict[str, Tuple[str, bool]]): diff --git a/clams/appmetadata/__init__.py b/clams/appmetadata/__init__.py index fd2fe75..b6fe185 100644 --- a/clams/appmetadata/__init__.py +++ b/clams/appmetadata/__init__.py @@ -357,6 +357,14 @@ class AppMetadata(pydantic.BaseModel): None, description="(optional) A string-to-string map that can be used to store any additional metadata of the app." ) + app_tags: List[str] = pydantic.Field( + [], + description="(optional) A list of short string labels that classify what kind of work this app does " + "(e.g. task name, output profile family). Used by downstream consumers as a first-pass filter " + "for selecting views; not a substitute for inspecting actual output types and properties. " + "The values declared here are propagated by the SDK into the ``appTags`` field of every view " + "the app signs." + ) est_gpu_mem_min: int = pydantic.Field( 0, description="(optional) Minimum GPU memory required to run the app, in megabytes (MB). " @@ -472,6 +480,19 @@ def add_input_oneof(self, *inputs: Union[str, Input, vocabulary.ThingTypesBase]) newinputs.append(i) self.input.append(newinputs) + def add_app_tag(self, *tags: str) -> None: + """ + Helper method to add one or more strings to the ``app_tags`` list, + skipping any value that is already present. + + :param tags: one or more tag strings to add + """ + for tag in tags: + if not isinstance(tag, str) or not tag: + raise ValueError(f"app tag must be a non-empty string: {tag!r}") + if tag not in self.app_tags: + self.app_tags.append(tag) + def add_output(self, at_type: Union[str, vocabulary.ThingTypesBase], **properties) -> Output: """ Helper method to add an element to the ``output`` list. diff --git a/clams/backends/__init__.py b/clams/backends/__init__.py new file mode 100644 index 0000000..d9fe452 --- /dev/null +++ b/clams/backends/__init__.py @@ -0,0 +1,9 @@ +""" +Optional model-backend helpers for CLAMS apps. + +Each backend is a separate submodule. Heavy dependencies (e.g., +``torch``, ``transformers``) are NOT pulled in by the base +``clams-python`` install; users opt in via pip extras such as +``pip install clams-python[hf]`` for the HuggingFace transformers +backend. +""" diff --git a/clams/backends/hf.py b/clams/backends/hf.py new file mode 100644 index 0000000..b2dcfab --- /dev/null +++ b/clams/backends/hf.py @@ -0,0 +1,247 @@ +""" +HuggingFace transformers backend helpers. + +Two general loaders that wrap the device / kwargs / inference-mode +boilerplate every HF-backed CLAMS app does identically: + +* :func:`load_hf_model` -- ``from_pretrained()`` flow for any model + class (instruction-tuned LLMs/VLMs, encoder-only classifiers, + vision/audio feature extractors, etc.). Use when the app needs raw + access to the underlying model and processor. +* :func:`load_hf_pipeline` -- task-level :func:`transformers.pipeline` + flow (ASR, NER, text classification, zero-shot, etc.). Use when + pipeline-level inference is sufficient. + +``torch`` and ``transformers`` are optional dependencies. Install them +via the ``[hf]`` extra:: + + pip install clams-python[hf] + +Imports are lazy: this module can be referenced from +:mod:`clams.app` without triggering an ``ImportError`` on a base +``clams-python`` install. The :class:`ImportError` only fires when a +loader is actually called without the extras. +""" +from typing import Any, Optional, Tuple, Union + + +def load_hf_model( + model_id: str, + model_cls, + processor_cls=None, + dtype=None, + device: Optional[str] = None, + padding_side: Optional[str] = None, + revision: Optional[str] = None, + model_kwargs: Optional[dict] = None, + processor_kwargs: Optional[dict] = None, + move_to_device: bool = True, +) -> Tuple[Any, Any, str]: + """ + Load a HuggingFace ``transformers`` model via ``from_pretrained`` + and return it ready for inference. + + :param model_id: HuggingFace model identifier (e.g., a Hub repo + name or a local path) forwarded to ``from_pretrained``. + :param model_cls: a ``transformers`` model class (e.g., + ``AutoModelForCausalLM``, ``AutoModelForImageTextToText``, + ``ConvNextV2Model``, ``ViTModel``, ...). Whatever supports + ``from_pretrained()``. + :param processor_cls: a processor / tokenizer / feature-extractor + class with ``from_pretrained()``. Defaults to + ``transformers.AutoProcessor``. Pass ``transformers.AutoTokenizer``, + ``transformers.AutoImageProcessor``, etc. for narrower cases. + Pass ``None`` explicitly to skip processor loading entirely + (the returned ``processor`` in that case is ``None``). + :param dtype: torch dtype for the model (e.g., ``torch.bfloat16``). + When ``None`` (default), no ``torch_dtype`` kwarg is forwarded + to ``from_pretrained`` -- the model class uses its own default + (typically float32). Set explicitly for low-precision LLM + inference. + :param device: target device string (e.g., ``'cuda'``, ``'cpu'``, + ``'cuda:0'``). When ``None`` (default), the helper auto-detects + cuda availability and falls back to cpu. + :param padding_side: if set (typically ``'left'`` for decoder-only + models doing batched generation), the helper configures the + underlying tokenizer's ``padding_side`` and -- when no pad + token is set -- uses the EOS token as the pad token. Leave + ``None`` for encoder / non-batched cases (the tokenizer's own + default is preserved). + :param revision: optional Git revision (commit hash, branch name, + or tag) on the Hub repository to pin the download to. When + set, forwarded as ``revision=...`` to both + ``model_cls.from_pretrained`` and + ``processor_cls.from_pretrained``, ensuring the model and + processor are loaded from the same commit. Strongly recommended + for production: pinning a commit hash makes the analyzer + artifact reproducible and immune to upstream silent updates. + Apps calling this helper directly should record the same hash + on ``analyzer_version`` (or ``analyzer_versions``) in + ``metadata.py`` so the output MMIF identifies the exact + artifact. Apps inheriting from + :class:`~clams.app.ClamsHFPromptableApp` do not call this + helper -- the base class reads ``analyzer_versions`` from the + app metadata and forwards the resolved revision automatically. + :param model_kwargs: extra kwargs forwarded to + ``model_cls.from_pretrained()`` (e.g., + ``{'use_safetensors': True, 'add_pooling_layer': False}``). + :param processor_kwargs: extra kwargs forwarded to + ``processor_cls.from_pretrained()`` (e.g., + ``{'use_safetensors': True, 'use_fast': True}``). + :param move_to_device: when ``True`` (default), the helper moves + the loaded model to the resolved device and switches it to + ``eval()`` mode -- the right behavior for a "ready for + inference" app loader. When ``False``, both steps are + skipped; the model is returned in the state + ``from_pretrained`` left it (on CPU, in train mode). Use + ``False`` for library-style HF wrappers that defer device + placement and inference-mode switching to a downstream + consumer (e.g. an extractor class that may be combined with + a head and only then placed on a device by the wrapping + classifier). The returned ``device`` is still the resolved + target, so the consumer can use it later for its own + ``.to(device)`` call. + + :returns: ``(processor, model, device)`` tuple. ``processor`` is + the loaded processor/tokenizer/feature-extractor (or ``None`` + if ``processor_cls`` was explicitly set to ``None``). + ``device`` is the resolved device string (the model was moved + there iff ``move_to_device=True``). + :rtype: Tuple[Any, Any, str] + :raises ImportError: if ``torch`` or ``transformers`` is not + installed. Install the ``[hf]`` extra to fix. + """ + try: + import torch # pytype: disable=import-error + except ImportError as e: + raise ImportError( + "clams.backends.hf requires the `torch` package. " + "Install with: pip install clams-python[hf]" + ) from e + try: + import transformers # pytype: disable=import-error + except ImportError as e: + raise ImportError( + "clams.backends.hf requires the `transformers` package. " + "Install with: pip install clams-python[hf]" + ) from e + + resolved_device = device or ('cuda' if torch.cuda.is_available() else 'cpu') + + # Processor. + if processor_cls is None and processor_kwargs is None: + # default to AutoProcessor + processor_cls = transformers.AutoProcessor + if processor_cls is not None: + processor_load_kwargs = dict(processor_kwargs or {}) + if revision is not None: + processor_load_kwargs.setdefault('revision', revision) + processor = processor_cls.from_pretrained( + model_id, **processor_load_kwargs) + if padding_side is not None: + tokenizer = getattr(processor, 'tokenizer', processor) + tokenizer.padding_side = padding_side + if getattr(tokenizer, 'pad_token', None) is None: + eos = getattr(tokenizer, 'eos_token', None) + if eos is not None: + tokenizer.pad_token = eos + else: + processor = None + + # Model. + model_load_kwargs = dict(model_kwargs or {}) + if dtype is not None: + model_load_kwargs['torch_dtype'] = dtype + if revision is not None: + model_load_kwargs.setdefault('revision', revision) + model = model_cls.from_pretrained(model_id, **model_load_kwargs) + if move_to_device: + model = model.to(resolved_device) + model.eval() + + return processor, model, resolved_device + + +def load_hf_pipeline( + task: str, + model_id: str, + device: Optional[Union[str, int]] = None, + revision: Optional[str] = None, + model_kwargs: Optional[dict] = None, + pipeline_kwargs: Optional[dict] = None, +) -> Tuple[Any, Union[str, int]]: + """ + Load a HuggingFace :func:`transformers.pipeline` for ``task`` and + return it ready for inference. Wraps the device / revision / + kwargs-forwarding boilerplate that every pipeline-backed CLAMS + app does identically. Use this for apps wrapping a task-level + pipeline (ASR via ``"automatic-speech-recognition"``, NER via + ``"token-classification"``, text classification, zero-shot, etc.); + use :func:`load_hf_model` instead when the app needs raw access + to the underlying model / processor (e.g., for custom chat-template + formatting or batched ``generate`` calls). + + :param task: pipeline task string forwarded to + :func:`transformers.pipeline` (e.g., + ``"automatic-speech-recognition"``, ``"token-classification"``). + :param model_id: HuggingFace model identifier (Hub repo name or + local path) forwarded to ``pipeline(model=...)``. + :param device: target device. Accepts the string form + (``'cuda'``, ``'cpu'``, ``'cuda:0'``) for parity with + :func:`load_hf_model`, or the integer form accepted natively + by ``pipeline`` (``-1`` for CPU, ``0+`` for GPU index). When + ``None`` (default), auto-detects cuda availability and falls + back to cpu (string form). + :param revision: optional Git revision (commit hash, branch, or + tag) on the Hub to pin the download to. Strongly recommended + for production; see :func:`load_hf_model` for rationale. + :param model_kwargs: extra kwargs forwarded to the underlying + ``model.from_pretrained()`` via the + ``pipeline(model_kwargs={...})`` channel. + :param pipeline_kwargs: extra kwargs forwarded directly to + :func:`transformers.pipeline` (e.g. ``generate_kwargs``, + ``tokenizer``, ``feature_extractor``, ``batch_size``, + ``framework``). ``model``, ``task``, ``device``, ``revision``, + and ``model_kwargs`` are owned by this helper -- explicit + helper args take precedence if any collide. + :returns: ``(pipeline, device)`` tuple. ``device`` is the resolved + device the pipeline is on, in the form it was passed (or the + auto-resolved string form when ``device=None``). + :rtype: Tuple[Any, Union[str, int]] + :raises ImportError: if ``torch`` or ``transformers`` is not + installed. Install the ``[hf]`` extra to fix. + """ + try: + import torch # pytype: disable=import-error + except ImportError as e: + raise ImportError( + "clams.backends.hf requires the `torch` package. " + "Install with: pip install clams-python[hf]" + ) from e + try: + from transformers import pipeline # pytype: disable=import-error + except ImportError as e: + raise ImportError( + "clams.backends.hf requires the `transformers` package. " + "Install with: pip install clams-python[hf]" + ) from e + + resolved_device = device if device is not None else ( + 'cuda' if torch.cuda.is_available() else 'cpu') + + pipeline_call_kwargs = dict(pipeline_kwargs or {}) + # Helper-owned keys: explicit args win on collision. + for owned in ('task', 'model', 'device'): + pipeline_call_kwargs.pop(owned, None) + if model_kwargs: + pipeline_call_kwargs['model_kwargs'] = dict(model_kwargs) + if revision is not None: + pipeline_call_kwargs['revision'] = revision + + pipe = pipeline( + task, + model=model_id, + device=resolved_device, + **pipeline_call_kwargs, + ) + return pipe, resolved_device diff --git a/clams/develop/__init__.py b/clams/develop/__init__.py index 4925780..5cbd84f 100644 --- a/clams/develop/__init__.py +++ b/clams/develop/__init__.py @@ -18,7 +18,13 @@ 'description': 'GtiHub Actions workflow files specific to `clamsproject` GitHub organization', 'sourcedir': 'gha', 'targetdir': '.github', - } + }, + 'utl-tf': { + 'description': 'Local helper module for iterating TimeFrames and collecting per-TF frame tasks ' + '(baked into ``utils/timeframe.py``; backend-agnostic, safe to edit/delete)', + 'sourcedir': 'utl-tf', + 'targetdir': 'utils', + }, } @@ -65,12 +71,20 @@ def bake(self, update_level=0): if recipe == 'gha': # There's nothing for devs to tweak GHA template, so first generation and updating are the same. self.bake_gha(src_dir, dst_dir) + if recipe.startswith('utl-'): + # Utility recipes bake static helper modules; once baked the + # code is local to the app and devs are free to edit. No + # templating-variable substitution is needed -- pass an + # empty dict so ``safe_substitute`` is a no-op. + if dst_dir.exists() and update_level == 0: + raise FileExistsError(f" {dst_dir} already exists. Did you mean `--update`? ") + self.bake_app(src_dir, dst_dir, {}) def bake_app(self, src_dir, dst_dir, templating_vars): for g in src_dir.glob("**/*.template"): r = g.relative_to(src_dir).parent f = g.with_suffix('').name - (dst_dir / r).mkdir(exist_ok=True) + (dst_dir / r).mkdir(parents=True, exist_ok=True) with open(g, 'r') as in_f, open(dst_dir/r/f, 'w') as out_f: tmpl_to_compile = Template(in_f.read()) diff --git a/clams/develop/templates/app/app.py.template b/clams/develop/templates/app/app.py.template index a7a4cc5..d2eb9b0 100644 --- a/clams/develop/templates/app/app.py.template +++ b/clams/develop/templates/app/app.py.template @@ -24,19 +24,64 @@ from mmif import Mmif, View, Annotation, Document, AnnotationTypes, DocumentType from lapps.discriminators import Uri +# ============================================================================= +# Pick a base class for your app: +# +# ClamsApp ............ default; the rest of this scaffold inherits from it. +# Implement ``_annotate()``. That's it. +# Choose for any non-LLM/VLM app: classical OCR / +# ASR engines, classifiers, rule-based tools, etc. +# +# ClamsPromptableApp .. for prompt-driven LLM/VLM/ALM/LMM apps wrapping a +# non-HF backend (remote APIs like OpenAI/Anthropic, +# vLLM, custom inference servers). +# Implement: ``_annotate()`` + ``generate()``. +# Import: +# from clams import ClamsPromptableApp +# Also in ``metadata.py``: uncomment the +# ``inject_promptable_parameters`` block. +# +# ClamsHFPromptableApp for prompt-driven apps wrapping a local HuggingFace +# ``transformers`` model (the typical VLM/LLM case). +# Implement: ``_annotate()`` (call +# ``self.load_model(parameters['model'])`` first) + +# declare class attributes: +# MODEL_CLS = +# DTYPE = torch.bfloat16 # optional +# PADDING_SIDE = 'left' # optional +# Import: +# from clams.app import ClamsHFPromptableApp +# Also in ``metadata.py``: set +# ``analyzer_versions={: , ...}`` +# on the ``AppMetadata`` call, and uncomment the +# ``ClamsHFPromptableApp.inject_promptable_parameters`` +# block (the HF override of the plain helper). +# Requires the ``[hf]`` extra: +# pip install clams-python[hf] +# Singleton ``analyzer_versions`` families pre-load +# in ``__init__`` (warm start); multi-member +# families load on the first ``load_model`` call +# and cache thereafter. ``generate()``, +# ``build_conversation``, and ``build_gen_kwargs`` +# have working defaults; override only for +# model-specific quirks. +# +# See https://clams.ai/clams-python/app-baseclasses.html for the full +# developer guide. +# ============================================================================= class $APP_CLASS_NAME(ClamsApp): def __init__(self): super().__init__() def _appmetadata(self): - # see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._load_appmetadata + # see https://clams.ai/clams-python/autodoc/clams.app.html#clams.app.ClamsApp._load_appmetadata # Also check out ``metadata.py`` in this directory. # When using the ``metadata.py`` leave this do-nothing "pass" method here. pass def _annotate(self, mmif: Mmif, **parameters) -> Mmif: - # see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._annotate + # see https://clams.ai/clams-python/autodoc/clams.app.html#clams.app.ClamsApp._annotate raise NotImplementedError def get_app(): diff --git a/clams/develop/templates/app/metadata.py.template b/clams/develop/templates/app/metadata.py.template index 93aec79..2de03a5 100644 --- a/clams/develop/templates/app/metadata.py.template +++ b/clams/develop/templates/app/metadata.py.template @@ -16,8 +16,8 @@ def appmetadata() -> AppMetadata: """ Function to set app-metadata values and return it as an ``AppMetadata`` obj. Read these documentations before changing the code below - - https://sdk.clams.ai/appmetadata.html metadata specification. - - https://sdk.clams.ai/autodoc/clams.appmetadata.html python API + - https://clams.ai/clams-python/appmetadata.html metadata specification. + - https://clams.ai/clams-python/autodoc/clams.appmetadata.html python API :return: AppMetadata object holding all necessary information. """ @@ -51,7 +51,48 @@ def appmetadata() -> AppMetadata: metadata.add_parameter(name='a_param', description='example parameter description', type='boolean', default='false') # metadta.add_parameter(more...) - + + # If your app subclasses ``ClamsPromptableApp`` (a prompt-driven LLM/VLM/audio-LM + # app on a non-HF backend), uncomment the following two lines to add the + # SDK-managed promptable parameters (prompt, systemPrompt, temperature, + # maxNewTokens, etc.) to your app's metadata. See + # https://clams.ai/clams-python/app-baseclasses.html#promptable for the + # developer guide. Reminder: these parameter names are reserved by the SDK; + # do not redeclare any of them above. + # from clams.app import ClamsPromptableApp + # ClamsPromptableApp.inject_promptable_parameters(metadata) + # + # If your app subclasses ``ClamsHFPromptableApp`` (HF transformers backend), + # use the HF override of the same helper -- it injects the promptable + # parameters AND a ``model`` parameter derived from ``analyzer_versions``. + # Also set ``analyzer_versions={: , ...}`` on the + # ``AppMetadata(...)`` call above (replaces the singular + # ``analyzer_version`` for HF apps). See + # https://clams.ai/clams-python/app-baseclasses.html#hf-promptable for details. + # from clams.app import ClamsHFPromptableApp + # ClamsHFPromptableApp.inject_promptable_parameters(metadata) + # + # To customize the default value of any promptable parameter (e.g. provide an + # app-specific default ``prompt``, raise ``maxNewTokens``, pin ``parallelPrompts``, + # etc.), mutate the ``default`` field on the already-injected parameter + # object; the SDK does NOT allow re-declaring promptable param names. See + # https://clams.ai/clams-python/app-baseclasses.html#promptable-customizing-defaults + # for details. Example: + # for p in metadata.parameters: + # if p.name == 'prompt': + # p.default = ['Describe what is in this image.'] + # elif p.name == 'maxNewTokens': + # p.default = 2048 + # + # HF-only: the ``model`` parameter the HF helper injects gets its ``default`` + # auto-set to the only key when ``analyzer_versions`` has a single entry + # (singleton family); for multi-member families the default is ``None`` and + # the caller MUST pass ``model=...`` on every request. To provide a + # recommended pick instead, mutate ``default`` the same way: + # for p in metadata.parameters: + # if p.name == 'model': + # p.default = '/' + # CHANGE this line and make sure return the compiled `metadata` instance return None diff --git a/clams/develop/templates/utl-tf/__init__.py.template b/clams/develop/templates/utl-tf/__init__.py.template new file mode 100644 index 0000000..e69de29 diff --git a/clams/develop/templates/utl-tf/timeframe.py.template b/clams/develop/templates/utl-tf/timeframe.py.template new file mode 100644 index 0000000..d2c8d7b --- /dev/null +++ b/clams/develop/templates/utl-tf/timeframe.py.template @@ -0,0 +1,154 @@ +""" +TimeFrame iteration / frame-sampling helpers, local to this app. + +Generated by ``clams develop -r utl-tf``. The code in this file is part +of your app, not the SDK; edit it freely, refactor as needed, or delete +the whole file if your app does not iterate TimeFrame annotations. + +The functions here factor out the canonical pattern that any CLAMS app +processing video by TimeFrames tends to write: + + 1. iterate TimeFrame annotations across input views, optionally + filtered by label + 2. sample frames per TF using the universal ``tfSamplingMode`` + parameter (representative TimePoints, the middle representative, + or every target / native-FPS frame) + 3. when ``vdh`` returns a fallback timestamp (milliseconds, no + existing TP behind it), mint a fresh ``TimePoint`` annotation in + the app's new view so downstream code has a stable anchor id + 4. assemble per-TF task tuples that downstream batching / + inference / annotation code can consume uniformly + +The helpers are backend-agnostic: tasks can feed a HuggingFace VLM, a +remote LLM API, a classical CV pipeline, or any other per-frame +processor. They have no dependency on ``clams.app.ClamsPromptableApp`` +or any other promptable / inference machinery. + +These functions are scaffolded into each app so individual apps can +edit them freely while the pattern stabilizes across the ecosystem. +Once the shape converges across several apps, the helpers are good +candidates for promotion into a shared package -- either +``mmif.utils`` (for the pure-MMIF iteration / TP minting pieces, which +have no clams-app dependency) or ``clams.`` (for the +task-tuple composition that does presuppose the "writing into a new +view" CLAMS-app idiom). If/when that happens, apps would import the +shared version and delete this local copy. +""" +from typing import Any, Iterator, List, Optional, Tuple, Union + +from mmif import Annotation, Document, Mmif, View, AnnotationTypes +from mmif.utils import video_document_helper as vdh + + +def iter_timeframes( + mmif: Mmif, tflabels_of_interest: List[str], +) -> Iterator[Annotation]: + """ + Yield every TimeFrame annotation in ``mmif``, filtered by + ``tflabels_of_interest`` when non-empty. + + :param mmif: the input MMIF object. + :param tflabels_of_interest: when non-empty, only TFs whose + ``label`` property matches one of these are yielded. An + empty list (the default in most apps) yields every TF + regardless of label. + """ + for view in mmif.get_all_views_contain(AnnotationTypes.TimeFrame): + for tf in view.get_annotations(AnnotationTypes.TimeFrame): + if (tflabels_of_interest + and tf.get_property('label') not in tflabels_of_interest): + continue + yield tf + + +def to_timepoints( + parent_view: View, + video_doc: Document, + sources: List[Union[str, int]], +) -> List[str]: + """ + Normalize a list of frame ``sources`` (as returned by + :func:`vdh.extract_images_by_mode_with_sources`) into a parallel + list of TimePoint ``id``\\ s. + + Each ``source`` is either: + + * ``str`` -- the id of an existing TimePoint annotation + (representative / target). Passed through unchanged. + * ``int`` -- a millisecond timestamp returned by ``vdh`` for the + interval-fallback case (``tfSamplingMode=single`` with no + representatives, or ``tfSamplingMode=all`` with no targets). + A fresh ``TimePoint`` annotation is minted in ``parent_view`` + at this timestamp; the new annotation's id is returned. + + The ``TimePoint`` type is registered with + ``parent_view.new_contain()`` lazily on the first mint, so apps + that never hit the fallback path do not get an empty + ``TimePoint`` entry in their view metadata. + + :param parent_view: the view this app is writing into; receives + any freshly-minted TimePoints. + :param video_doc: the source VideoDocument; recorded as + ``document`` on each minted TimePoint. + :param sources: per-frame source identifiers from ``vdh``. + :return: a list of TimePoint ids, parallel to ``sources``. + """ + tp_contain_registered = False + out: List[str] = [] + for src in sources: + if isinstance(src, str): + out.append(src) + else: + if not tp_contain_registered: + parent_view.new_contain(AnnotationTypes.TimePoint) + tp_contain_registered = True + tp = parent_view.new_annotation( + AnnotationTypes.TimePoint, + document=video_doc.id, + timePoint=int(src), + timeUnit='milliseconds', + ) + out.append(tp.id) + return out + + +def collect_timeframes_of_interest( + mmif: Mmif, + parent_view: View, + video_doc: Document, + tflabels_of_interest: List[str], +) -> List[Tuple[List[Any], List[str], str, Optional[str]]]: + """ + Convenience composition of :func:`iter_timeframes`, + :func:`vdh.extract_images_by_mode_with_sources`, and + :func:`to_timepoints`. Returns one + ``(images, tp_ids, tf_id, tf_label)`` task per matching TimeFrame + that produced at least one sampled frame. + + Each task's ``images`` and ``tp_ids`` are parallel lists -- one + entry per frame sampled from that TF (length 1 for + ``tfSamplingMode=single``, N for ``representatives`` / ``all``). + Each entry of ``tp_ids`` is either the id of an existing + TimePoint or the id of a freshly-minted one (see + :func:`to_timepoints`). ``tf_label`` is the source TimeFrame's + ``label`` property value, or ``None`` if unset. + + :param mmif: the input MMIF. + :param parent_view: the view this app is writing into. + :param video_doc: the source VideoDocument that frames are + extracted from. + :param tflabels_of_interest: optional label filter; empty list = + no filter. + :return: per-TF task tuples, ready to feed a batched inference + loop or any other per-frame processor. + """ + tasks: List[Tuple[List[Any], List[str], str, Optional[str]]] = [] + for tf in iter_timeframes(mmif, tflabels_of_interest): + images, sources = vdh.extract_images_by_mode_with_sources( + mmif, tf, as_PIL=True) + if not images: + continue + tp_ids = to_timepoints(parent_view, video_doc, sources) + tf_label = tf.get_property('label') + tasks.append((list(images), tp_ids, tf.id, tf_label)) + return tasks diff --git a/documentation/app-baseclasses.rst b/documentation/app-baseclasses.rst new file mode 100644 index 0000000..6ac5cfc --- /dev/null +++ b/documentation/app-baseclasses.rst @@ -0,0 +1,521 @@ +.. _app-baseclasses: + +Specialized App Base Classes +============================ + +Beyond the bare-minimum :class:`~clams.app.ClamsApp` introduced in +:ref:`introduction`, the SDK provides specialized base classes that capture +common structural patterns for CLAMS apps. Each specialized base class +extends :class:`~clams.app.ClamsApp` with a standardized runtime parameter +surface and helper methods appropriate to its category of app. App +developers inherit from the specialized base class that best matches what +their app does, instead of inheriting from :class:`~clams.app.ClamsApp` +directly. + +This page first recaps what every CLAMS app inherits from +:class:`~clams.app.ClamsApp` (the baseline), then documents each +specialized base class and what it adds on top. + +.. _app-baseline: + +What every CLAMS app inherits +----------------------------- + +Every CLAMS app subclasses :class:`~clams.app.ClamsApp` (directly or via +a specialized base class such as :class:`~clams.app.ClamsPromptableApp`) +and inherits its baseline behaviors: parameter casting and refinement, +view signing, JSON envelope unwrapping, CUDA memory profiling and +cleanup, error views, and a set of *universal* runtime parameters that +the SDK auto-injects into every app's metadata. + +Universal parameters +^^^^^^^^^^^^^^^^^^^^ + +Added automatically by :meth:`~clams.app.ClamsApp.__init__` at runtime +and by the standard ``metadata.py`` template's ``__main__`` block at +``python metadata.py`` time. App developers do not declare them. + +.. list-table:: + :header-rows: 1 + :widths: 18 12 18 8 44 + + * - Name + - Type + - Default + - Multi-valued + - Notes + * - ``pretty`` + - boolean + - ``false`` + - no + - When ``true``, the response MMIF JSON is re-formatted with + 2-space indentation. + * - ``runningTime`` + - boolean + - ``true`` + - no + - When ``true``, the running time of the request is recorded in + the view metadata. + * - ``hwFetch`` + - boolean + - ``false`` + - no + - When ``true``, host hardware info (architecture, GPU and vRAM) + is recorded in the view metadata. + * - ``tfSamplingMode`` + - string + - ``'representatives'`` + - no + - For apps that process ``TimeFrame`` annotations: how to sample + frames within each TimeFrame. Choices: ``'representatives'``, + ``'single'``, ``'all'``. No effect on apps that do not process + TimeFrames. + +.. _sdk-managed-reserved: + +SDK-managed parameter names are reserved +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Parameter names added by the SDK (the universal parameters listed +above, plus any parameters added by a specialized base class) are +reserved. An app's ``appmetadata()`` MUST NOT declare any of these +names via :meth:`AppMetadata.add_parameter` directly; doing so trips +the existing duplicate-name ``ValueError`` when the SDK tries to add +its own spec. + +This reservation guarantees a uniform, predictable parameter interface +across all CLAMS apps. App developers can still customize a reserved +parameter's *default value* (but not its ``type``, ``multivalued``, or +``choices``) by mutating the ``default`` field on the already-injected +parameter object; see :ref:`promptable-customizing-defaults` for a +worked example. + +.. _promptable: + +Promptable CLAMS Apps +--------------------- + +A **promptable app** is a CLAMS app that wraps a promptable model: a large +language model (LLM), vision-language model (VLM), audio-language model +(ALM), large multimodal model (LMM), or remote generative API. The SDK +provides :class:`~clams.app.ClamsPromptableApp` as a specialized base class +for these apps. It standardizes the runtime parameter surface (prompts, +generation hyperparameters, batch size) and provides helpers for building +chat conversations and persisting model responses into MMIF. + +This section is the developer guide for writing or migrating a CLAMS app +that inherits from :class:`~clams.app.ClamsPromptableApp`. For the general +CLAMS app development pattern, see the :ref:`introduction`, +:ref:`tutorial`, and :ref:`runtime-params` pages. + +When to use ``ClamsPromptableApp`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Choose :class:`~clams.app.ClamsPromptableApp` over :class:`~clams.app.ClamsApp` +when your app's core operation is "given a prompt and some input +(image/audio/text/structured data), return generated text." Concretely: + +- Image captioning, VLM-based OCR, scene description +- Audio captioning, transcription via ALMs +- Summarization, classification, structured-data extraction via LLMs +- Tasks driven by an LMM that takes mixed-modality inputs +- Any app that wraps a remote LLM, VLM, ALM, or LMM API and forwards a prompt + +If your app does not call a generative model (e.g. a classical OCR engine, +a speech-to-text engine that doesn't take prompts, a classifier wrapping a +discriminative model), keep using :class:`~clams.app.ClamsApp` directly. + +.. note:: + + ``ClamsPromptableApp`` assumes an **instruction- or chat-tuned** + model with a system/user/assistant role structure. Bare completion + / next-token-prediction base models do not fit this base class + cleanly; use :class:`~clams.app.ClamsApp` directly for those. + +Standardized runtime parameters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Every :class:`~clams.app.ClamsPromptableApp` exposes the following +SDK-managed runtime parameters in addition to the universal parameters +from :class:`~clams.app.ClamsApp`. These names are reserved; see +:ref:`sdk-managed-reserved`. + +.. list-table:: + :header-rows: 1 + :widths: 18 12 18 8 44 + + * - Name + - Type + - Default + - Multi-valued + - Notes + * - ``prompt`` + - string + - *(required, no default)* + - yes + - User prompt(s) sent to the model. A single value runs as a one-shot + generation. A multi-value list is interpreted as a multi-turn static + prompt; see :ref:`promptable-multiturn`. + * - ``systemPrompt`` + - string + - ``''`` + - no + - Optional system-role text prepended to the conversation. + * - ``promptMode`` + - string + - ``'turn-taking'`` + - no + - How to interpret a multi-value ``prompt`` list. Choices: + ``'turn-taking'`` or ``'user-only'``. See :ref:`promptable-multiturn`. + * - ``maxNewTokens`` + - integer + - ``512`` + - no + - Maximum number of new tokens generated per inference call. Larger values + grow the KV cache linearly and add to GPU memory usage; reduce if VRAM + is constrained. + * - ``temperature`` + - number + - ``0.0`` + - no + - Sampling temperature. ``0.0`` selects deterministic / greedy decoding + for maximum reproducibility; override for sampled generation. + * - ``topP`` + - number + - ``1.0`` + - no + - Nucleus-sampling cumulative probability cutoff. Only meaningful when + ``temperature`` > 0. + * - ``topK`` + - integer + - ``50`` + - no + - Top-K sampling cutoff. Only meaningful when ``temperature`` > 0. + * - ``parallelPrompts`` + - integer + - ``1`` + - no + - Number of independent prompts the app stacks into a single + forward pass. Per-prompt content size is the app's + responsibility; prompt count and per-prompt size combine + multiplicatively for GPU memory. Keep at ``1`` on memory-tight + setups; see the parameter's own description in + :py:attr:`~clams.app.ClamsPromptableApp.promptable_parameters` + for an OOM-risk example. + +.. _promptable-customizing-defaults: + +Customizing default values +"""""""""""""""""""""""""" + +The SDK ships sensible defaults for most promptable parameters but +deliberately leaves ``prompt`` **without** a default; prompts are +inherently app-specific and no single value is right for all apps. +Beyond ``prompt``, other defaults may also be inappropriate for a given +app: a model that needs longer outputs wants a higher ``maxNewTokens``, +a small-VRAM deployment wants ``parallelPrompts`` pinned at ``1``, etc. + +Because the reservation rule prevents calling +``metadata.add_parameter('prompt', ...)`` (or any other promptable name) +directly, the recommended pattern for customizing defaults is to mutate +the ``default`` field on the already-injected parameter object right +after calling :meth:`~clams.app.ClamsPromptableApp.inject_promptable_parameters`. +You'll see a worked example of this in the ``metadata.py`` generated +by the ``clams develop`` scaffold. + +This works for any promptable parameter. The parameter spec itself +(``type``, ``multivalued``, ``choices``) stays locked by the SDK; only +the ``default`` field is meant to be mutated this way, which preserves +the cross-app uniformity that the reservation rule is designed to +guarantee. + +If an app *wants* to require callers to pass a value explicitly (for +``prompt`` or any other parameter), it can simply leave the default +unchanged. ``prompt`` already has no default, so the SDK will raise a +"required parameter" error if the caller omits it; for other params, +deleting the SDK default and leaving it ``None`` would have the same +effect, though that's rarely useful. + +.. _promptable-declaration: + +Declaring a promptable app +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A promptable app requires two paired edits relative to the scaffold +generated by ``clams develop``: + +1. In ``app.py``, change the app class's base from + :class:`~clams.app.ClamsApp` to + :class:`~clams.app.ClamsPromptableApp` and implement + :meth:`~clams.app.ClamsPromptableApp.generate`. The scaffold file + already contains a guiding comment at the class declaration line. +2. In ``metadata.py``, call + :meth:`ClamsPromptableApp.inject_promptable_parameters + ` at + the end of ``appmetadata()``. The scaffold file already contains + a commented-out helper-call block; uncomment it. + +The ``__main__`` block in ``metadata.py`` is unchanged from +non-promptable apps. The helper call inside ``appmetadata()`` makes +the promptable parameters visible to both ``python metadata.py`` +(build-time discovery) and to +:meth:`~clams.app.ClamsApp._load_appmetadata` (runtime). The base +class change ensures the app inherits the parameter-presence +validation, the ``generate()`` contract, and the helper methods at +runtime. + +The ``generate()`` contract +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Subclasses of :class:`~clams.app.ClamsPromptableApp` that wrap a backend +without a default SDK implementation (e.g., remote-API or custom local +backends) MUST implement :meth:`~clams.app.ClamsPromptableApp.generate`. +Subclasses of :class:`~clams.app.ClamsHFPromptableApp` inherit a concrete +``generate()`` and do not need to override it. See the method's docstring +for the full signature, batch semantics, and return value. + +Keep inference logic inside ``generate()`` distinct from MMIF I/O; the +latter belongs in ``_annotate()`` (which calls ``self.generate()``). +This separation lets HF-backed apps inherit the default ``generate()`` +without restating backend mechanics, and lets non-HF apps swap in a new +``generate()`` without rewriting their MMIF I/O. + +.. _promptable-multiturn: + +Multi-turn handling (``promptMode``) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``prompt`` is always a ``List[str]`` after parameter casting. When the +list has a single element, ``promptMode`` is irrelevant (single-shot +generation). When the list has multiple elements, ``promptMode`` selects +between two multi-element prompting strategies: + +**Turn-taking** (default). The list is interpreted as an alternating +user/assistant conversation: even indices (0, 2, 4, ...) are user turns, +odd indices are assistant turns. The full conversation is sent to the +model in a single ``generate`` call. This mode supports any pattern +that fits an alternating role structure, including few-shot in-context +learning (where the (user, assistant) pairs are task exemplars and the +final user turn is the new query), multi-turn dialogue continuation, +and role-play scaffolding. Example (few-shot ICL): ``["Classify +sentiment: 'I love this.'", "positive", "Classify sentiment: 'I hate +this.'", "negative", "Classify sentiment: 'It's okay.'"]``: two +exemplar pairs followed by a final query; one inference returns the +final reply. + +**User-only**. Every element is a user turn; the model generates an +assistant reply between each, in N successive ``generate`` calls. Only +the final assistant response is returned per input item. This mode +implements iterative / scripted multi-step prompting, a manual, +externally-driven scaffold for stepwise reasoning. (It is distinct +from in-model zero-shot chain-of-thought, where stepwise reasoning is +elicited inside a single inference call by a prompt like "let's think +step by step"; here, the user-side scaffolding makes the steps +explicit and feeds each intermediate model output back as context for +the next user turn.) Example (scripted multi-step reasoning): +``["Step 1: identify objects.", "Step 2: describe relationships.", +"Step 3: conclude."]``: three sequential user prompts, three +inferences, final reply returned. + +``turn-taking`` is the default because it costs a single inference call +and is the more common multi-element pattern. + +Helpers +^^^^^^^ + +:meth:`~clams.app.ClamsPromptableApp.inject_promptable_parameters` + A static method called from your app's ``appmetadata()`` (in + ``metadata.py``) to add the SDK-managed promptable parameters. + +:meth:`~clams.app.ClamsPromptableApp.build_conversation` + Instance method that constructs a chat-template-compatible message + list (or a ``List[List[dict]]`` of progressively-extending prefixes + for ``user-only`` mode). Handles string and list prompt forms, the + two ``promptMode`` semantics, the optional ``systemPrompt``, and + inlines ``images`` / ``audios`` into the (final) user turn. Accepts + a pre-built ``List[dict]`` and returns it unchanged. Subclasses + may override to access model-specific state (e.g. + ``self.processor``) when formatting messages. + +:meth:`~clams.app.ClamsPromptableApp.response_to_grounded_textdocument` + Writes a ``TextDocument`` plus an ``Alignment`` (``source -> TD``) + into a view. ``source`` is the coarse cross-modal anchor; the + optional ``origins`` (paired with ``origination``) is the finer + derivation list, written to the TD's ``origins`` / ``origination`` + properties. See https://clams.ai/clams-vocabulary/Document for + vocabulary semantics. + +.. _hf-promptable: + +HuggingFace Promptable Apps +--------------------------- + +For the very common case of "promptable CLAMS app + local HuggingFace +``transformers`` model," the SDK provides +:class:`~clams.app.ClamsHFPromptableApp`, a specialized subclass of +:class:`~clams.app.ClamsPromptableApp` that absorbs all HF-specific +inference boilerplate. Concrete apps inheriting from it declare the +model via a few class attributes and typically only need to implement +``_annotate()`` for their MMIF I/O. + +When to use +^^^^^^^^^^^ + +Choose :class:`~clams.app.ClamsHFPromptableApp` over plain +:class:`~clams.app.ClamsPromptableApp` when your app: + +- wraps a local HuggingFace ``transformers`` model loadable via + ``from_pretrained()``, AND +- runs the standard chat-template -> ``model.generate`` -> + ``batch_decode`` inference pipeline (every modern instruct-tuned + VLM/LLM in HF), AND +- doesn't need bespoke pixel-value preprocessing or vision-token + stitching at inference time. + +If your app uses a remote API instead (OpenAI, Anthropic, etc.), or a +non-HF local backend, inherit from +:class:`~clams.app.ClamsPromptableApp` directly and implement +:meth:`~clams.app.ClamsPromptableApp.generate` yourself. + +.. _hf-promptable-declaring: + +Declaring an HF promptable app +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +On top of the baseline declaration shared by every promptable app +(see :ref:`promptable-declaration`), a +:class:`~clams.app.ClamsHFPromptableApp` subclass: + +1. Uses :class:`~clams.app.ClamsHFPromptableApp` (not + :class:`~clams.app.ClamsPromptableApp`) as the base class in + ``app.py``. +2. Declares the required class attribute ``MODEL_CLS`` and any + optional dtype / padding / kwargs hints (see + :ref:`hf-promptable-class-attrs` for the full list). +3. Sets ``analyzer_versions={: , ...}`` on the + ``AppMetadata`` constructor call in ``metadata.py`` (replaces the + singular ``analyzer_version`` for HF apps). +4. Calls + :meth:`ClamsHFPromptableApp.inject_promptable_parameters + ` + (the HF override of the plain helper) at the end of + ``appmetadata()``. The scaffold ``metadata.py`` contains a + commented-out HF block; uncomment it. +5. Inherits the base class's + :meth:`~clams.app.ClamsPromptableApp.generate` implementation; + no override needed. + +For a minimal worked example, see the class docstring on +:class:`~clams.app.ClamsHFPromptableApp`. + +.. _hf-promptable-class-attrs: + +Class-attribute hooks +^^^^^^^^^^^^^^^^^^^^^ + +Concrete subclasses declare the model class plus optional dtype / +padding hints via class attributes, and declare the family of +supported model variants (with pinned commits) via +``analyzer_versions`` in ``metadata.py``: + +.. list-table:: + :header-rows: 1 + :widths: 22 60 18 + + * - Attribute + - Meaning + - Required + * - ``MODEL_CLS`` + - ``transformers`` model class (e.g. + :class:`~transformers.AutoModelForImageTextToText`, + :class:`~transformers.AutoModelForCausalLM`). + - yes + * - ``PROCESSOR_CLS`` + - Processor / tokenizer / feature-extractor class. Defaults to + :class:`~transformers.AutoProcessor`. + - no + * - ``DTYPE`` + - Torch dtype for the model and for ``pixel_values`` casting in + :py:meth:`~clams.app.ClamsHFPromptableApp.generate`. E.g. + ``torch.bfloat16`` for low-precision LLM inference. + - no + * - ``PADDING_SIDE`` + - Tokenizer padding side. ``'left'`` for decoder-only batched + generation; leave unset otherwise. + - no + * - ``MODEL_KWARGS`` / ``PROCESSOR_KWARGS`` + - Extra kwargs forwarded to the respective + ``from_pretrained()`` calls (e.g. + ``trust_remote_code=True``). + - no + +The HF model identifiers themselves are NOT a class attribute. They +live in ``metadata.py`` as ``analyzer_versions``, a +``Dict[str, str]`` mapping each supported model id to its pinned +commit hash. The SDK auto-derives a ``model`` runtime parameter +from this dict, with ``choices`` set to the dict keys. + +Family / singleton handling +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When ``analyzer_versions`` contains a single entry (the typical +single-model app), the SDK eagerly pre-loads that one model in +``__init__`` and sets ``model.default`` to the only key so callers +can omit the parameter. Single-model apps thus preserve warm-start +semantics: the model is loaded at app startup, not on first request. + +When ``analyzer_versions`` contains multiple entries (a family app), +loading is deferred until the first :py:meth:`load_model` call inside +``_annotate``, and ``model`` has no default by default; callers +must pick a family member explicitly (or the dev mutates +``model.default`` post-injection to provide a recommended pick). +Loaded models are cached per ``(model_id, revision)`` for the +lifetime of the app instance; switching models loads on first miss, +cache-hits on repeat. + +Reproducibility: ``model`` refinement and view metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The user-facing ``model`` parameter accepts raw HF model ids +(``org/repo-name``). The SDK's +:py:meth:`~clams.app.ClamsHFPromptableApp._refine_params` expands the +raw value to ``org/repo-name@`` form (using the dict +lookup) during parameter refinement. The standard ``sign_view`` flow +then stamps: + +- the **raw** user choice into ``view.metadata.parameters['model']`` + (transparency: what the user typed), +- the **resolved** ``org/repo-name@`` into + ``view.metadata.appConfiguration['model']`` (reproducibility: the + exact commit applied). + +A consumer of the output MMIF can read the resolved revision directly +from the view metadata, with no cross-reference to the app metadata +required. + +What the base class provides +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A subclass typically only writes ``_annotate()``. The base class +supplies: + +* model loading and caching via + :py:meth:`~clams.app.ClamsHFPromptableApp.load_model`, which wraps + :func:`clams.backends.hf.load_hf_model` (non-promptable HF apps + can call that loader directly without going through this base + class); +* the parameter injector + :py:meth:`ClamsHFPromptableApp.inject_promptable_parameters + `; +* a concrete batched HF + :py:meth:`~clams.app.ClamsHFPromptableApp.generate`; +* a default + :py:meth:`~clams.app.ClamsHFPromptableApp.build_gen_kwargs` that + maps the SDK promptable parameters to HF ``model.generate()`` + kwargs. + +See each method's docstring for full details. + +Apps using the HF backend (with or without the promptable wrapper) +must install the ``[hf]`` extra: ``pip install clams-python[hf]``. + diff --git a/documentation/index.rst b/documentation/index.rst index 9f6db33..5882e11 100644 --- a/documentation/index.rst +++ b/documentation/index.rst @@ -16,6 +16,7 @@ CLAMS Python SDK introduction input-output runtime-params + app-baseclasses gpu-apps appmetadata appdirectory diff --git a/documentation/introduction.rst b/documentation/introduction.rst index ce907e8..96435c5 100644 --- a/documentation/introduction.rst +++ b/documentation/introduction.rst @@ -72,13 +72,16 @@ As a developer you can expose different behaviors of the ``annotate()`` method b These runtime configurations are not part of the MMIF input, but for reproducible analysis, you should record these configurations in the output MMIF. .. note:: - There are *universal* parameters defined at the SDK-level that all CLAMS apps commonly use. See :const:`clams.app.ClamsApp.universal_parameters`. + Some runtime parameters are managed by the SDK itself rather than declared per-app. The *universal* parameters in :const:`clams.app.ClamsApp.universal_parameters` are one such set; they are auto-added to every CLAMS app. Specialized base classes (see below) add their own SDK-managed parameter sets on top. .. warning:: All the runtime configurations should be pre-announced in the app metadata. Also see <:doc:`tutorial`> for a step-by-step tutorial on how to write the ``_annotate()`` method with a simple example NLP tool. +.. note:: + Inheriting from :class:`~clams.app.ClamsApp` directly works for any CLAMS app. For common app categories (e.g. apps wrapping LLM or other multimodal models), the SDK provides specialized base classes that extend :class:`~clams.app.ClamsApp` with additional SDK-managed parameter sets and helpers. See :ref:`app-baseclasses`. + appmetadata() """"""""""""" diff --git a/documentation/modules.rst b/documentation/modules.rst index 7897b4a..d25e3cd 100644 --- a/documentation/modules.rst +++ b/documentation/modules.rst @@ -7,5 +7,6 @@ API documentation autodoc/clams.app autodoc/clams.appmetadata + autodoc/clams.backends autodoc/clams.restify autodoc/clams.mmif_utils diff --git a/documentation/runtime-params.rst b/documentation/runtime-params.rst index 4d3bf93..146102e 100644 --- a/documentation/runtime-params.rst +++ b/documentation/runtime-params.rst @@ -190,6 +190,20 @@ For more complex value structures (e.g., comma-separated lists within values), the app developer is responsible for further parsing and should document the expected format in the parameter's ``description`` field. +.. _runtime-params-promptable-note: + +Promptable apps: an extra SDK-managed parameter set +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For apps that wrap an **instruction- or chat-tuned** promptable model +(an LLM or other multimodal model, local or remote), inherit from +:class:`~clams.app.ClamsPromptableApp` instead of +:class:`~clams.app.ClamsApp`. The promptable base class adds a +standardized SDK-managed parameter set on top of the universal +parameters; the names are reserved and are added via a single helper +call inside ``appmetadata()``. See :ref:`promptable` for the full +developer guide and parameter list. + .. _runtime-params-envelope-note: Note on JSON envelope input diff --git a/documentation/tutorial.md b/documentation/tutorial.md index b7f7469..e5d2698 100644 --- a/documentation/tutorial.md +++ b/documentation/tutorial.md @@ -146,7 +146,7 @@ This means that if the user doesn't specify the value for these parameters at th If you want to make a parameter "optional" by providing a default value, you can do so by adding a `default` argument to the `add_parameter()` method. > **Note** -> Also refer to [CLAMS App Metadata](https://sdk.clams.ai/appmetadata.html) for more details regarding what fields need to be specified. +> Also refer to [CLAMS App Metadata](https://clams.ai/clams-python/appmetadata.html) for more details regarding what fields need to be specified. #### `_annotate()` The `_annotate()` method should accept a MMIF file/string/object as its first parameter and always returns a `MMIF` object with an additional `view` containing annotation results. This is where the bulk of your logic will go. For a text processing app, it is mostly concerned with finding text documents, calling the code that runs over the text, creating new views and inserting the results. @@ -228,20 +228,20 @@ First, with `text_value` we get the text from the text document, either from its ## Working with TimeFrame Annotations -Many CLAMS apps process video by operating on TimeFrame annotations produced by an upstream app (e.g., scene detection, shot segmentation). A TimeFrame can carry structural members (currently called `targets` — a list of TimePoint IDs covering every frame in the segment), a salient subset of those members (currently called `representatives`), or simply `start`/`end` boundaries. +Many CLAMS apps process video by operating on TimeFrame annotations produced by an upstream app (e.g., scene detection, shot segmentation). A TimeFrame can carry structural members (currently called `targets`; a list of TimePoint IDs covering every frame in the segment), a salient subset of those members (currently called `representatives`), or simply `start`/`end` boundaries. > **Note** > The property names `targets` and `representatives` are under review and may be renamed in a future MMIF spec version. See [mmif#238](https://github.com/clamsproject/mmif/issues/238) for the ongoing discussion. The SDK API will be updated accordingly. ### Frame sampling with `tfSamplingMode` -When your app receives TimeFrame annotations, the caller can control which frames your app processes by setting the `tfSamplingMode` runtime parameter. This is a **universal parameter** — automatically available on every CLAMS app without any per-app configuration. +When your app receives TimeFrame annotations, the caller can control which frames your app processes by setting the `tfSamplingMode` runtime parameter. This is a **universal parameter**: automatically available on every CLAMS app without any per-app configuration. There are three modes: -- `representatives` (default) — use the frames listed in the TimeFrame's `representatives` property. If no representatives exist, the TimeFrame is skipped. -- `single` — pick one frame: the middle representative if available, otherwise the midpoint of the start/end interval. -- `all` — use every frame in `targets` if present, otherwise generate every frame in the start/end interval. +- `representatives` (default): use the frames listed in the TimeFrame's `representatives` property. If no representatives exist, the TimeFrame is skipped. +- `single`: pick one frame: the middle representative if available, otherwise the midpoint of the start/end interval. +- `all`: use every frame in `targets` if present, otherwise generate every frame in the start/end interval. App developers do **not** need to handle this parameter themselves. The SDK intercepts it in `annotate()` and sets a context variable before `_annotate()` runs. Inside `_annotate()`, calls to `vdh.extract_frames_by_mode()` automatically read the active mode and select frames accordingly. The underlying per-mode functions (`_sample_representatives()`, `_sample_single()`, `_sample_all()`) in `mmif.utils.video_document_helper` are also available for apps that need frame numbers without extracting images. diff --git a/pyproject.toml b/pyproject.toml index 1b3fc4e..048410d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ classifiers = [ "Programming Language :: Python :: 3 :: Only", ] dependencies = [ - "mmif-python==1.4.0", + "mmif-python==1.5.0", "Flask>=2", "Flask-RESTful>=0.3.9", "gunicorn>=20", @@ -39,6 +39,9 @@ source = "https://github.com/clamsproject/clams-python" dev = ["pytype", "pytest", "pytest-cov", "pillow", "setuptools"] docs = ["sphinx>=7.0,<8.0", "furo", "m2r2", "sphinx-jsonschema"] test = ["pytype", "pytest", "pytest-cov", "pillow"] +# Required for apps using the HuggingFace transformers backend +# (clams.backends.hf). Heavy deps; opt-in only. +hf = ["torch", "transformers", "pillow", "tqdm"] [tool.setuptools.packages.find] where = ["."] diff --git a/tests/test_backends_hf.py b/tests/test_backends_hf.py new file mode 100644 index 0000000..fae696e --- /dev/null +++ b/tests/test_backends_hf.py @@ -0,0 +1,450 @@ +""" +Tests for :mod:`clams.backends.hf`. + +Exercises the device / dtype / padding-side / kwargs-passthrough +behavior of both :func:`load_hf_model` and :func:`load_hf_pipeline` +against mocked ``transformers`` model, processor, and pipeline +constructors. + +If ``torch`` is not installed, the whole file is skipped (it is an +optional dep behind the ``[hf]`` extra). +""" +import unittest +from unittest import mock + +import pytest + +pytest.importorskip('torch') +pytest.importorskip('transformers') + +# Force ``transformers.pipeline`` to be eagerly resolved into the +# package's ``__dict__``. ``transformers`` uses a lazy-loading +# ``_LazyModule`` that fetches submodule attributes via +# ``__getattr__`` on first access; before that, the attribute does +# not live in ``__dict__``. The first ``mock.patch('transformers.pipeline', ...)`` +# call would then silently fail to redirect ``from transformers import pipeline`` +# inside the helper. Touching the attribute here resolves it and +# caches it in the package dict, so subsequent ``mock.patch`` calls +# rewrite the real entry as expected. +import transformers # noqa: E402 +_ = transformers.pipeline + +from clams.backends.hf import load_hf_model, load_hf_pipeline # noqa: E402 + + +# --------------------------------------------------------------------------- +# Mocks +# --------------------------------------------------------------------------- + +class _MockModel: + """Stand-in for a ``transformers`` model class.""" + + # cross-test state — each test should set this to None first + last_from_pretrained_args = None + last_from_pretrained_kwargs = None + + @classmethod + def from_pretrained(cls, model_id, **kwargs): + cls.last_from_pretrained_args = (model_id,) + cls.last_from_pretrained_kwargs = dict(kwargs) + return cls() + + def __init__(self): + self.device = None + self.eval_called = False + + def to(self, device): + self.device = device + return self + + def eval(self): + self.eval_called = True + return self + + +class _MockTokenizer: + def __init__(self): + self.padding_side = 'right' + self.pad_token = None + self.eos_token = '' + + +class _MockProcessor: + """Stand-in for ``AutoProcessor`` (or similar).""" + + last_from_pretrained_args = None + last_from_pretrained_kwargs = None + + @classmethod + def from_pretrained(cls, model_id, **kwargs): + cls.last_from_pretrained_args = (model_id,) + cls.last_from_pretrained_kwargs = dict(kwargs) + return cls() + + def __init__(self): + self.tokenizer = _MockTokenizer() + + +# --------------------------------------------------------------------------- +# Test cases +# --------------------------------------------------------------------------- + +class TestDefaultsOnly(unittest.TestCase): + """ + Case (a): caller passes only ``model_id`` + ``model_cls``. + No dtype, no padding_side, no extra kwargs. + """ + + def setUp(self): + _MockModel.last_from_pretrained_args = None + _MockModel.last_from_pretrained_kwargs = None + _MockProcessor.last_from_pretrained_args = None + _MockProcessor.last_from_pretrained_kwargs = None + + def test_returns_processor_model_device_tuple(self): + result = load_hf_model( + 'fake-model-id', _MockModel, processor_cls=_MockProcessor) + self.assertEqual(len(result), 3) + processor, model, device = result + self.assertIsInstance(processor, _MockProcessor) + self.assertIsInstance(model, _MockModel) + self.assertIsInstance(device, str) + # cpu or cuda depending on host — must be one of them + self.assertIn(device, ('cpu', 'cuda')) + + def test_no_torch_dtype_passed_when_dtype_is_none(self): + load_hf_model( + 'fake-model-id', _MockModel, processor_cls=_MockProcessor) + # When dtype is None, helper should NOT inject torch_dtype into + # model_cls.from_pretrained (let the model class use its own + # default). + kwargs = _MockModel.last_from_pretrained_kwargs + self.assertNotIn('torch_dtype', kwargs) + + def test_padding_side_untouched_when_not_requested(self): + processor, _, _ = load_hf_model( + 'fake-model-id', _MockModel, processor_cls=_MockProcessor) + # Default 'right' should persist; helper should NOT have + # rewritten it. + self.assertEqual(processor.tokenizer.padding_side, 'right') + # pad_token should NOT have been forced to EOS. + self.assertIsNone(processor.tokenizer.pad_token) + + def test_model_put_in_eval_mode(self): + _, model, _ = load_hf_model( + 'fake-model-id', _MockModel, processor_cls=_MockProcessor) + self.assertTrue(model.eval_called) + + +class TestDecoderOnlyMode(unittest.TestCase): + """ + Case (b): caller passes ``padding_side='left'`` (decoder-only + batched generation) and an explicit ``dtype``. + """ + + def setUp(self): + _MockModel.last_from_pretrained_args = None + _MockModel.last_from_pretrained_kwargs = None + _MockProcessor.last_from_pretrained_args = None + _MockProcessor.last_from_pretrained_kwargs = None + + def test_padding_side_set_to_left_on_tokenizer(self): + processor, _, _ = load_hf_model( + 'fake-model-id', _MockModel, + processor_cls=_MockProcessor, + padding_side='left', + ) + self.assertEqual(processor.tokenizer.padding_side, 'left') + + def test_pad_token_set_from_eos_when_unset(self): + processor, _, _ = load_hf_model( + 'fake-model-id', _MockModel, + processor_cls=_MockProcessor, + padding_side='left', + ) + self.assertEqual( + processor.tokenizer.pad_token, + processor.tokenizer.eos_token, + ) + + def test_dtype_forwarded_as_torch_dtype(self): + import torch + load_hf_model( + 'fake-model-id', _MockModel, + processor_cls=_MockProcessor, + dtype=torch.bfloat16, + padding_side='left', + ) + self.assertEqual( + _MockModel.last_from_pretrained_kwargs.get('torch_dtype'), + torch.bfloat16, + ) + + +class TestKwargsPassThrough(unittest.TestCase): + """ + Case (c): ``model_kwargs`` and ``processor_kwargs`` reach the + respective ``from_pretrained`` calls. Validates the SWT-style + pattern (use_safetensors, use_fast, add_pooling_layer, etc.). + """ + + def setUp(self): + _MockModel.last_from_pretrained_args = None + _MockModel.last_from_pretrained_kwargs = None + _MockProcessor.last_from_pretrained_args = None + _MockProcessor.last_from_pretrained_kwargs = None + + def test_model_kwargs_reach_from_pretrained(self): + load_hf_model( + 'fake-model-id', _MockModel, + processor_cls=_MockProcessor, + model_kwargs={'use_safetensors': True, + 'add_pooling_layer': False}, + ) + kw = _MockModel.last_from_pretrained_kwargs + self.assertTrue(kw.get('use_safetensors')) + self.assertFalse(kw.get('add_pooling_layer')) + + def test_processor_kwargs_reach_from_pretrained(self): + load_hf_model( + 'fake-model-id', _MockModel, + processor_cls=_MockProcessor, + processor_kwargs={'use_safetensors': True, 'use_fast': True}, + ) + kw = _MockProcessor.last_from_pretrained_kwargs + self.assertTrue(kw.get('use_safetensors')) + self.assertTrue(kw.get('use_fast')) + + def test_model_id_arrives_first_positional(self): + load_hf_model( + 'fake-model-id', _MockModel, processor_cls=_MockProcessor) + self.assertEqual( + _MockModel.last_from_pretrained_args, ('fake-model-id',)) + self.assertEqual( + _MockProcessor.last_from_pretrained_args, ('fake-model-id',)) + + def test_model_and_processor_kwargs_do_not_cross_contaminate(self): + """SWT mixes incompatible kwargs across model and processor; + ensure helper doesn't blindly merge them.""" + load_hf_model( + 'fake-model-id', _MockModel, + processor_cls=_MockProcessor, + model_kwargs={'add_pooling_layer': False}, + processor_kwargs={'use_fast': True}, + ) + # add_pooling_layer is model-only; should NOT reach processor + self.assertNotIn( + 'add_pooling_layer', + _MockProcessor.last_from_pretrained_kwargs) + # use_fast is processor-only; should NOT reach model + self.assertNotIn( + 'use_fast', + _MockModel.last_from_pretrained_kwargs) + + +class TestDeviceResolution(unittest.TestCase): + """The helper auto-detects cuda/cpu when device is None.""" + + def setUp(self): + _MockModel.last_from_pretrained_args = None + _MockModel.last_from_pretrained_kwargs = None + + def test_explicit_device_honored(self): + _, model, device = load_hf_model( + 'fake-model-id', _MockModel, + processor_cls=_MockProcessor, + device='cpu', + ) + self.assertEqual(device, 'cpu') + self.assertEqual(model.device, 'cpu') + + +class TestMoveToDeviceFlag(unittest.TestCase): + """ + ``move_to_device=False`` skips both the ``.to(device)`` move and + the ``.eval()`` switch, for library-style HF wrappers that defer + device placement and inference-mode switching to a downstream + consumer. + """ + + def setUp(self): + _MockModel.last_from_pretrained_args = None + _MockModel.last_from_pretrained_kwargs = None + + def test_move_skipped_when_flag_false(self): + _, model, _ = load_hf_model( + 'fake-model-id', _MockModel, + processor_cls=_MockProcessor, + move_to_device=False, + ) + # _MockModel.__init__ leaves device=None; .to() would set it. + self.assertIsNone(model.device) + + def test_eval_skipped_when_flag_false(self): + _, model, _ = load_hf_model( + 'fake-model-id', _MockModel, + processor_cls=_MockProcessor, + move_to_device=False, + ) + self.assertFalse(model.eval_called) + + def test_resolved_device_still_returned(self): + """Even when not moved, the resolved target is reported so the + downstream consumer can use it for its own ``.to(device)``.""" + _, _, device = load_hf_model( + 'fake-model-id', _MockModel, + processor_cls=_MockProcessor, + device='cpu', + move_to_device=False, + ) + self.assertEqual(device, 'cpu') + + def test_default_still_moves_and_evals(self): + """Regression guard: the default (omitted) value of the new + flag preserves prior behavior.""" + _, model, _ = load_hf_model( + 'fake-model-id', _MockModel, + processor_cls=_MockProcessor, + device='cpu', + ) + self.assertEqual(model.device, 'cpu') + self.assertTrue(model.eval_called) + + +# --------------------------------------------------------------------------- +# load_hf_pipeline tests +# --------------------------------------------------------------------------- + +class _FakePipeline: + """Captures the args/kwargs the helper forwards to + ``transformers.pipeline``. Behaves as the returned pipeline object + too -- just a tagged callable stand-in.""" + + last_args = None + last_kwargs = None + + def __init__(self, *args, **kwargs): + type(self).last_args = args + type(self).last_kwargs = dict(kwargs) + + +def _patch_pipeline(): + """Patch ``transformers.pipeline`` to record its call and return a + ``_FakePipeline`` instance.""" + _FakePipeline.last_args = None + _FakePipeline.last_kwargs = None + return mock.patch('transformers.pipeline', _FakePipeline) + + +class TestLoadHFPipelineDefaults(unittest.TestCase): + """The default path: just task + model_id.""" + + def test_returns_pipeline_and_device(self): + with _patch_pipeline(): + pipe, device = load_hf_pipeline( + 'automatic-speech-recognition', 'openai/whisper-tiny') + self.assertIsInstance(pipe, _FakePipeline) + self.assertIn(device, ('cpu', 'cuda')) + + def test_task_arrives_first_positional(self): + with _patch_pipeline(): + load_hf_pipeline( + 'token-classification', 'fake/ner-model') + self.assertEqual(_FakePipeline.last_args, ('token-classification',)) + + def test_model_id_forwarded_as_model_kwarg(self): + with _patch_pipeline(): + load_hf_pipeline( + 'automatic-speech-recognition', 'openai/whisper-tiny') + self.assertEqual( + _FakePipeline.last_kwargs.get('model'), 'openai/whisper-tiny') + + def test_no_revision_kwarg_when_not_specified(self): + with _patch_pipeline(): + load_hf_pipeline( + 'automatic-speech-recognition', 'openai/whisper-tiny') + self.assertNotIn('revision', _FakePipeline.last_kwargs) + + +class TestLoadHFPipelineDevice(unittest.TestCase): + """Device handling: auto-detect, explicit string, explicit int.""" + + def test_auto_detect_when_none(self): + with _patch_pipeline(): + _, device = load_hf_pipeline( + 'automatic-speech-recognition', 'openai/whisper-tiny') + self.assertIn(device, ('cpu', 'cuda')) + # Same value should have been passed to pipeline(). + self.assertEqual(_FakePipeline.last_kwargs.get('device'), device) + + def test_explicit_string_device_honored(self): + with _patch_pipeline(): + _, device = load_hf_pipeline( + 'automatic-speech-recognition', 'openai/whisper-tiny', + device='cpu') + self.assertEqual(device, 'cpu') + self.assertEqual(_FakePipeline.last_kwargs.get('device'), 'cpu') + + def test_explicit_int_device_honored(self): + """``pipeline()`` natively accepts ``-1`` for CPU, ``0+`` for + a specific GPU index. The helper passes it through unchanged.""" + with _patch_pipeline(): + _, device = load_hf_pipeline( + 'automatic-speech-recognition', 'openai/whisper-tiny', + device=-1) + self.assertEqual(device, -1) + self.assertEqual(_FakePipeline.last_kwargs.get('device'), -1) + + +class TestLoadHFPipelineKwargsPassThrough(unittest.TestCase): + """``model_kwargs`` lands inside ``pipeline(model_kwargs={...})``; + ``pipeline_kwargs`` is spread directly into the pipeline call.""" + + def test_pipeline_kwargs_spread_into_call(self): + with _patch_pipeline(): + load_hf_pipeline( + 'automatic-speech-recognition', 'openai/whisper-tiny', + pipeline_kwargs={ + 'generate_kwargs': {'num_beams': 5}, + 'batch_size': 8, + }) + kw = _FakePipeline.last_kwargs + self.assertEqual(kw.get('generate_kwargs'), {'num_beams': 5}) + self.assertEqual(kw.get('batch_size'), 8) + + def test_model_kwargs_nested_under_model_kwargs(self): + with _patch_pipeline(): + load_hf_pipeline( + 'automatic-speech-recognition', 'openai/whisper-tiny', + model_kwargs={'use_safetensors': True}) + kw = _FakePipeline.last_kwargs + self.assertEqual(kw.get('model_kwargs'), + {'use_safetensors': True}) + + def test_revision_forwarded(self): + with _patch_pipeline(): + load_hf_pipeline( + 'automatic-speech-recognition', 'openai/whisper-tiny', + revision='abc1234') + self.assertEqual(_FakePipeline.last_kwargs.get('revision'), 'abc1234') + + def test_explicit_helper_args_take_precedence(self): + """If the caller smuggles ``model`` / ``device`` / ``revision`` + through ``pipeline_kwargs``, the helper's own args win.""" + with _patch_pipeline(): + load_hf_pipeline( + 'automatic-speech-recognition', 'openai/whisper-tiny', + device='cpu', revision='abc1234', + pipeline_kwargs={ + 'model': 'should-be-overridden', + 'device': 'should-be-overridden', + 'revision': 'should-be-overridden', + }) + kw = _FakePipeline.last_kwargs + self.assertEqual(kw['model'], 'openai/whisper-tiny') + self.assertEqual(kw['device'], 'cpu') + self.assertEqual(kw['revision'], 'abc1234') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_clamsapp.py b/tests/test_clamsapp.py index 1abcbac..412aa5d 100644 --- a/tests/test_clamsapp.py +++ b/tests/test_clamsapp.py @@ -262,6 +262,33 @@ def test_sign_view(self): self.assertEqual(len(v4.metadata.appConfiguration), 6) self.assertEqual(len(v4.metadata.parameters['multivalued_param']), len(str(multiple_values))) + def test_app_tags_default_empty(self): + # apps that don't declare tags should not write an appTags field to view metadata + self.assertEqual(self.app.metadata.app_tags, []) + m = Mmif(self.in_mmif) + v = m.new_view() + self.app.sign_view(v, {}) + self.assertIsNone(v.metadata.get('appTags')) + + def test_app_tags_propagated_to_view(self): + # tags declared on app metadata should appear verbatim in signed views + self.app.metadata.add_app_tag('TemporalSegmentation', 'BarsDetection') + m = Mmif(self.in_mmif) + v = m.new_view() + self.app.sign_view(v, {}) + self.assertEqual( + v.metadata.get('appTags'), + ['TemporalSegmentation', 'BarsDetection'], + ) + + def test_add_app_tag_dedupes_and_validates(self): + self.app.metadata.add_app_tag('foo', 'bar', 'foo') + self.assertEqual(self.app.metadata.app_tags, ['foo', 'bar']) + with self.assertRaises(ValueError): + self.app.metadata.add_app_tag('') + with self.assertRaises(ValueError): + self.app.metadata.add_app_tag(123) # type: ignore[arg-type] + def test_annotate(self): # The example app is hard-coded to **always** emit version mismatch warning out_mmif = self.app.annotate(self.in_mmif) diff --git a/tests/test_promptable.py b/tests/test_promptable.py new file mode 100644 index 0000000..44a8fe7 --- /dev/null +++ b/tests/test_promptable.py @@ -0,0 +1,585 @@ +""" +Tests for :class:`clams.app.ClamsPromptableApp`. + +Covers the behavior documented in +``documentation/app-baseclasses.rst``: parameter discovery via +``inject_promptable_parameters()``, the reservation rule on +promptable-param names, ``build_conversation()`` shape across the +single-turn / turn-taking / user-only modes, and the +``response_to_grounded_textdocument()`` output contract. +""" +import unittest + +from mmif import AnnotationTypes, DocumentTypes, Mmif + +from clams import AppMetadata, ClamsPromptableApp + + +# --------------------------------------------------------------------------- +# Test infrastructure +# --------------------------------------------------------------------------- + +def make_metadata(call_helper=True, pre_declare=None, + analyzer_versions=None, hf_helper=False): + """ + Build a fresh AppMetadata for tests. + + :param call_helper: if True, calls + ``ClamsPromptableApp.inject_promptable_parameters(metadata)`` + at the end (simulating a correctly-written ``appmetadata()``). + Mutually exclusive with ``hf_helper``. + :param pre_declare: if set to a parameter spec dict, calls + ``metadata.add_parameter(**pre_declare)`` BEFORE the helper + runs — used to test reservation enforcement. + :param analyzer_versions: if set, passed through to + ``AppMetadata(analyzer_versions=...)``. Required when the + fixture is consumed by ``ClamsHFPromptableApp`` tests. + :param hf_helper: if True, calls + ``ClamsHFPromptableApp.inject_promptable_parameters(metadata)`` + (the HF override of the plain promptable helper). Use for HF + fixture builds. + """ + kwargs = dict( + name="Example Promptable App", + description="Test fixture, creating input TD - output TD alignment", + app_license="MIT", + identifier="https://apps.clams.ai/example-promptable/v1", + url="https://fakegithub.com/some/repository", + ) + if analyzer_versions is not None: + kwargs['analyzer_versions'] = analyzer_versions + m = AppMetadata(**kwargs) + m.add_input(DocumentTypes.TextDocument) + m.add_output(DocumentTypes.TextDocument) + m.add_output(AnnotationTypes.Alignment) + if pre_declare is not None: + m.add_parameter(**pre_declare) + if hf_helper: + from clams.app import ClamsHFPromptableApp + ClamsHFPromptableApp.inject_promptable_parameters(m) + elif call_helper: + ClamsPromptableApp.inject_promptable_parameters(m) + return m + + +def make_test_app(metadata): + """ + Factory creating a fresh ClamsPromptableApp subclass that loads the + given metadata. Each call produces a fresh class so per-test state + doesn't leak. + """ + + def _load_appmetadata(self): + return metadata + + cls = type( + 'TestPromptableApp', + (ClamsPromptableApp,), + { + '_load_appmetadata': _load_appmetadata, + '_appmetadata': lambda self: None, + '_annotate': lambda self, mmif, **kw: mmif, + 'generate': lambda self, prompt, **kw: [""], + }, + ) + return cls() + + +# --------------------------------------------------------------------------- +# Parameter discovery (via the helper) +# --------------------------------------------------------------------------- + +class TestParameterDiscovery(unittest.TestCase): + + def test_all_promptable_params_present_after_init(self): + app = make_test_app(make_metadata(call_helper=True)) + present = {p.name for p in app.metadata.parameters} + expected_promptable = {p['name'] + for p in ClamsPromptableApp.promptable_parameters} + self.assertTrue(expected_promptable.issubset(present)) + + def test_prompt_has_no_sdk_default(self): + app = make_test_app(make_metadata(call_helper=True)) + prompt_param = next(p for p in app.metadata.parameters + if p.name == 'prompt') + self.assertIsNone(prompt_param.default) + self.assertTrue(prompt_param.multivalued) + + def test_system_prompt_default_empty_string(self): + app = make_test_app(make_metadata(call_helper=True)) + sysprompt = next(p for p in app.metadata.parameters + if p.name == 'systemPrompt') + self.assertEqual(sysprompt.default, '') + + def test_temperature_default_is_zero(self): + """When the caller omits ``temperature``, it should arrive in + ``_annotate()`` as the float ``0.0`` (deterministic decoding).""" + app = make_test_app(make_metadata(call_helper=True)) + refined = app._refine_params(prompt=['hi']) + self.assertEqual(refined['temperature'], 0.0) + self.assertIsInstance(refined['temperature'], float) + + def test_prompt_mode_choices(self): + app = make_test_app(make_metadata(call_helper=True)) + pm = next(p for p in app.metadata.parameters + if p.name == 'promptMode') + self.assertEqual(set(pm.choices), {'user-only', 'turn-taking'}) + self.assertEqual(pm.default, 'turn-taking') + + +# --------------------------------------------------------------------------- +# Required-prompt validation +# --------------------------------------------------------------------------- + +class TestRequiredPrompt(unittest.TestCase): + + def test_refine_params_raises_when_prompt_missing(self): + """ + ``prompt`` has no SDK default. ``_refine_params`` must raise + ``ValueError`` when the caller omits it. + """ + app = make_test_app(make_metadata(call_helper=True)) + with self.assertRaises(ValueError) as ctx: + app._refine_params() + self.assertIn('prompt', str(ctx.exception)) + + +# --------------------------------------------------------------------------- +# Missing-helper validation in __init__ +# --------------------------------------------------------------------------- + +class TestMissingHelperValidation(unittest.TestCase): + + def test_init_raises_when_helper_not_called(self): + """ + If ``appmetadata()`` forgets to call + ``inject_promptable_parameters()``, ``__init__`` must raise + ``ValueError`` with an instructive message. + """ + with self.assertRaises(ValueError) as ctx: + make_test_app(make_metadata(call_helper=False)) + msg = str(ctx.exception) + self.assertIn('inject_promptable_parameters', msg) + + +# --------------------------------------------------------------------------- +# Reservation enforcement (via duplicate-name ValueError) +# --------------------------------------------------------------------------- + +class TestReservationEnforcement(unittest.TestCase): + + def test_redeclaring_prompt_trips_duplicate_name_error(self): + """ + An app that calls ``metadata.add_parameter('prompt', ...)`` + before the helper trips the existing duplicate-name + ``ValueError`` from ``AppMetadata.add_parameter`` (which the + helper's own ``add_parameter`` call raises). + """ + with self.assertRaises(ValueError) as ctx: + make_metadata( + call_helper=True, + pre_declare={ + 'name': 'prompt', + 'description': 'app-defined collision', + 'type': 'string', + 'multivalued': True, + }, + ) + self.assertIn("'prompt'", str(ctx.exception)) + + def test_redeclaring_max_new_tokens_trips_error(self): + with self.assertRaises(ValueError) as ctx: + make_metadata( + call_helper=True, + pre_declare={ + 'name': 'maxNewTokens', + 'description': 'app-defined collision', + 'type': 'integer', + 'default': 1024, + }, + ) + self.assertIn("'maxNewTokens'", str(ctx.exception)) + + +# --------------------------------------------------------------------------- +# annotate_param_caster covers promptable params (no stale-spec drift) +# --------------------------------------------------------------------------- + +class TestAnnotateParamCaster(unittest.TestCase): + + def test_caster_includes_promptable_param_specs(self): + app = make_test_app(make_metadata(call_helper=True)) + for spec in ClamsPromptableApp.promptable_parameters: + self.assertIn(spec['name'], app.annotate_param_spec) + stored_type, stored_multivalued = \ + app.annotate_param_spec[spec['name']] + self.assertEqual(stored_type, spec['type']) + self.assertEqual( + stored_multivalued, spec.get('multivalued', False)) + + def test_multivalued_prompt_casts_to_list_of_strings(self): + app = make_test_app(make_metadata(call_helper=True)) + refined = app._refine_params(prompt=['hello', 'world']) + self.assertEqual(refined['prompt'], ['hello', 'world']) + + def test_max_new_tokens_casts_to_int(self): + app = make_test_app(make_metadata(call_helper=True)) + refined = app._refine_params(prompt=['hi'], maxNewTokens=['1024']) + self.assertEqual(refined['maxNewTokens'], 1024) + self.assertIsInstance(refined['maxNewTokens'], int) + + def test_temperature_casts_to_float(self): + app = make_test_app(make_metadata(call_helper=True)) + refined = app._refine_params(prompt=['hi'], temperature=['0.7']) + self.assertEqual(refined['temperature'], 0.7) + self.assertIsInstance(refined['temperature'], float) + + +# --------------------------------------------------------------------------- +# build_conversation +# --------------------------------------------------------------------------- + +class TestBuildConversation(unittest.TestCase): + """ + Covers the shape of ``ClamsPromptableApp.build_conversation()`` + across single-turn, turn-taking, and user-only modes, and the + pre-built-message pass-through case. + """ + + def setUp(self): + self.app = make_test_app(make_metadata(call_helper=True)) + + def test_string_prompt_single_user_turn(self): + conv = self.app.build_conversation(prompt="hello") + self.assertEqual(len(conv), 1) + self.assertEqual(conv[0]['role'], 'user') + + def test_single_element_list_single_user_turn(self): + conv = self.app.build_conversation(prompt=['hello']) + self.assertEqual(len(conv), 1) + self.assertEqual(conv[0]['role'], 'user') + + def test_turn_taking_alternating_turns(self): + conv = self.app.build_conversation( + prompt=['q1', 'a1', 'q2'], prompt_mode='turn-taking') + self.assertEqual(len(conv), 3) + self.assertEqual(conv[0]['role'], 'user') + self.assertEqual(conv[1]['role'], 'assistant') + self.assertEqual(conv[2]['role'], 'user') + + def test_user_only_returns_progressively_extending_lists(self): + convs = self.app.build_conversation( + prompt=['q1', 'q2', 'q3'], prompt_mode='user-only') + # N progressively-extending message lists, one per turn + self.assertEqual(len(convs), 3) + # last conversation has all 3 user turns (+ intermediate + # assistant turns once the model has filled them in; at + # build_conversation time the assistants are placeholders or + # empty — the test pins length, not exact content) + self.assertGreaterEqual(len(convs[-1]), 3) + + def test_pre_built_list_pass_through(self): + msgs = [ + {'role': 'system', 'content': 'You are helpful.'}, + {'role': 'user', 'content': 'hi'}, + ] + conv = self.app.build_conversation(prompt=msgs) + self.assertEqual(conv, msgs) + + def test_system_prompt_prepended(self): + conv = self.app.build_conversation( + prompt='hello', system_prompt='You are helpful.') + # first turn is a system message + self.assertEqual(conv[0]['role'], 'system') + + def test_images_carried_in_user_content(self): + sentinel = object() + conv = self.app.build_conversation( + prompt='describe this', images=[sentinel]) + # the sentinel image should appear somewhere in the first + # user-turn content + user_turn = next(m for m in conv if m['role'] == 'user') + # content is typically a list of dicts; flatten to a sequence + # of values and check for the sentinel + flat = [] + + def _walk(x): + if isinstance(x, dict): + for v in x.values(): + _walk(v) + elif isinstance(x, list): + for v in x: + _walk(v) + else: + flat.append(x) + + _walk(user_turn['content']) + self.assertIn(sentinel, flat) + + +# --------------------------------------------------------------------------- +# response_to_grounded_textdocument +# --------------------------------------------------------------------------- + +class TestStoreResponse(unittest.TestCase): + + def setUp(self): + self.app = make_test_app(make_metadata(call_helper=True)) + self.mmif = Mmif(validate=False) + self.view = self.mmif.new_view() + self.app.sign_view(self.view, {}) + self.view.new_contain(DocumentTypes.TextDocument) + self.view.new_contain(AnnotationTypes.Alignment) + + def test_happy_path_creates_textdocument_and_alignment(self): + td, align = self.app.response_to_grounded_textdocument( + self.view, source='src1', response='generated text') + self.assertEqual(td.text_value, 'generated text') + self.assertEqual(align.get_property('source'), 'src1') + self.assertEqual(align.get_property('target'), td.id) + + def test_reasoning_trace_none_does_not_raise(self): + # no exception + self.app.response_to_grounded_textdocument( + self.view, source='src1', response='text', + reasoning_trace=None) + + def test_reasoning_trace_not_none_raises_not_implemented(self): + with self.assertRaises(NotImplementedError): + self.app.response_to_grounded_textdocument( + self.view, source='src1', response='text', + reasoning_trace='intermediate reasoning') + + # TODO (krim @ 05/28/26): this test case belongs upstream in the + # vocabulary type definition (the `origins`/`origination` pairing + # is a property of the `Document` type, per clams-vocabulary#18, + # not a behavior of the SDK app layer). Move once clams-vocabulary + # supports conditional prop validation. For now, this is a sanity + # check that the SDK correctly forwards both kwargs through to the + # underlying TD. + def test_origins_and_origination_written_together(self): + td, align = self.app.response_to_grounded_textdocument( + self.view, source='tf1', response='caption text', + origins=['tp1'], origination='derived') + self.assertEqual(td.get_property('origins'), ['tp1']) + self.assertEqual(td.get_property('origination'), 'derived') + self.assertEqual(align.get_property('source'), 'tf1') + self.assertEqual(align.get_property('target'), td.id) + + def test_unpaired_origins_or_origination_raises(self): + unpaired = [ + {'origins': ['tp1']}, + {'origination': 'derived'}, + ] + for kwargs in unpaired: + with self.subTest(**kwargs), self.assertRaises(ValueError): + self.app.response_to_grounded_textdocument( + self.view, source='src1', response='text', **kwargs) + + +# --------------------------------------------------------------------------- +# Transport-neutral parameter casting +# --------------------------------------------------------------------------- + +class TestTransportNeutralCasting(unittest.TestCase): + """ + Just exercises the standard ``ClamsApp`` parameter-casting path. + Not envelope-specific; the point is that promptable apps see no + separate transport layer. + """ + + def test_multi_element_prompt_arrives_as_list_of_strings(self): + app = make_test_app(make_metadata(call_helper=True)) + refined = app._refine_params(prompt=['a', 'b', 'c']) + self.assertEqual(refined['prompt'], ['a', 'b', 'c']) + for x in refined['prompt']: + self.assertIsInstance(x, str) + + def test_single_element_prompt_still_list(self): + app = make_test_app(make_metadata(call_helper=True)) + refined = app._refine_params(prompt=['only']) + self.assertEqual(refined['prompt'], ['only']) + + +# --------------------------------------------------------------------------- +# ClamsHFPromptableApp class-attribute validation +# --------------------------------------------------------------------------- + +class TestHFPromptableAppClassAttrs(unittest.TestCase): + """ + Exercises the class-attribute validation in + :class:`ClamsHFPromptableApp.__init__`. The actual model loading + is patched out so these tests don't require torch/transformers. + End-to-end inference tests live separately. + """ + + SINGLETON_AV = {'org/fake-model': 'deadbee'} + MULTI_AV = { + 'org/large-model': 'aaaaaaa', + 'org/small-model': 'bbbbbbb', + } + + def _make_subclass( + self, *, model_cls=object, + analyzer_versions=None, **extra_attrs): + if analyzer_versions is None: + analyzer_versions = dict(self.SINGLETON_AV) + attrs = { + '_load_appmetadata': lambda self: make_metadata( + hf_helper=True, + analyzer_versions=dict(analyzer_versions), + ), + '_appmetadata': lambda self: None, + '_annotate': lambda self, mmif, **kw: mmif, + 'MODEL_CLS': model_cls, + } + attrs.update(extra_attrs) + from clams.app import ClamsHFPromptableApp + return type('TestHFApp', (ClamsHFPromptableApp,), attrs) + + def test_missing_model_cls_raises(self): + cls = self._make_subclass(model_cls=None) + with self.assertRaises(ValueError) as ctx: + cls() + self.assertIn('MODEL_CLS', str(ctx.exception)) + + def test_missing_analyzer_versions_raises(self): + # Use the plain promptable helper so promptable params are + # injected (parent __init__ passes) but analyzer_versions is + # absent and ``model`` was never injected. HF __init__ should + # refuse on the analyzer_versions check. + from clams.app import ClamsHFPromptableApp + cls = type('TestHFAppBad', (ClamsHFPromptableApp,), { + '_load_appmetadata': lambda self: make_metadata( + call_helper=True), # plain promptable, no analyzer_versions + '_appmetadata': lambda self: None, + '_annotate': lambda self, mmif, **kw: mmif, + 'MODEL_CLS': object, + }) + with self.assertRaises(ValueError) as ctx: + cls() + self.assertIn('analyzer_versions', str(ctx.exception)) + + def _patch_load(self): + """ + Context-manager-ish helper that swaps in a fake ``load_hf_model`` + recording every call. Returns ``(restore_fn, calls_list)``. + """ + import clams.backends.hf as hf_module + original = hf_module.load_hf_model + calls = [] + + def fake_load(model_id, model_cls, **kwargs): + calls.append({'model_id': model_id, 'model_cls': model_cls, **kwargs}) + # processor / model / device tuple uniquely identifiable + return (f'PROC:{model_id}@{kwargs.get("revision")}', + f'MODEL:{model_id}@{kwargs.get("revision")}', + 'cpu') + + hf_module.load_hf_model = fake_load + return (lambda: setattr(hf_module, 'load_hf_model', original)), calls + + def test_singleton_eagerly_preloads_in_init(self): + restore, calls = self._patch_load() + try: + cls = self._make_subclass( + analyzer_versions=self.SINGLETON_AV, + DTYPE='FAKE_DTYPE', + PADDING_SIDE='left', + MODEL_KWARGS={'trust_remote_code': True}, + ) + app = cls() + # eager load on the single family member + self.assertEqual(len(calls), 1) + self.assertEqual(calls[0]['model_id'], 'org/fake-model') + self.assertEqual(calls[0]['revision'], 'deadbee') + self.assertEqual(calls[0]['dtype'], 'FAKE_DTYPE') + self.assertEqual(calls[0]['padding_side'], 'left') + self.assertEqual( + calls[0]['model_kwargs'], {'trust_remote_code': True}) + # self.processor / self.model / self.device populated + self.assertEqual(app.processor, 'PROC:org/fake-model@deadbee') + self.assertEqual(app.model, 'MODEL:org/fake-model@deadbee') + self.assertEqual(app.device, 'cpu') + finally: + restore() + + def test_multimember_defers_loading(self): + restore, calls = self._patch_load() + try: + cls = self._make_subclass(analyzer_versions=self.MULTI_AV) + app = cls() + # no eager load for multi-member families + self.assertEqual(calls, []) + self.assertIsNone(app.processor) + self.assertIsNone(app.model) + self.assertIsNone(app.device) + finally: + restore() + + def test_load_model_parses_at_revision_form_and_caches(self): + restore, calls = self._patch_load() + try: + cls = self._make_subclass(analyzer_versions=self.MULTI_AV) + app = cls() + # first call -- load via load_hf_model + app.load_model('org/large-model@aaaaaaa') + self.assertEqual(len(calls), 1) + self.assertEqual(calls[0]['model_id'], 'org/large-model') + self.assertEqual(calls[0]['revision'], 'aaaaaaa') + self.assertEqual(app.processor, 'PROC:org/large-model@aaaaaaa') + # second call same model -- cache hit, no new load + app.load_model('org/large-model@aaaaaaa') + self.assertEqual(len(calls), 1) + # switch to other family member -- new load + app.load_model('org/small-model@bbbbbbb') + self.assertEqual(len(calls), 2) + self.assertEqual(calls[1]['model_id'], 'org/small-model') + self.assertEqual(calls[1]['revision'], 'bbbbbbb') + self.assertEqual(app.processor, 'PROC:org/small-model@bbbbbbb') + # back to first -- still cached + app.load_model('org/large-model@aaaaaaa') + self.assertEqual(len(calls), 2) + self.assertEqual(app.processor, 'PROC:org/large-model@aaaaaaa') + finally: + restore() + + def test_load_model_accepts_raw_form_looks_up_revision(self): + restore, calls = self._patch_load() + try: + cls = self._make_subclass(analyzer_versions=self.MULTI_AV) + app = cls() + app.load_model('org/small-model') # no @rev suffix + self.assertEqual(calls[0]['model_id'], 'org/small-model') + self.assertEqual(calls[0]['revision'], 'bbbbbbb') + finally: + restore() + + def test_refine_params_expands_modelid_to_at_revision(self): + restore, _ = self._patch_load() + try: + cls = self._make_subclass(analyzer_versions=self.MULTI_AV) + app = cls() + refined = app._refine_params( + prompt=['hi'], + model=['org/large-model'], + ) + self.assertEqual(refined['model'], 'org/large-model@aaaaaaa') + finally: + restore() + + def test_singleton_default_lets_user_omit_modelid(self): + restore, _ = self._patch_load() + try: + cls = self._make_subclass(analyzer_versions=self.SINGLETON_AV) + app = cls() + # No model in input -- SDK fills in the singleton default, + # then our override expands it. + refined = app._refine_params(prompt=['hi']) + self.assertEqual(refined['model'], 'org/fake-model@deadbee') + finally: + restore() + + +if __name__ == '__main__': + unittest.main()