diff --git a/clams/app/__init__.py b/clams/app/__init__.py
index 1d3f0a2..88f612b 100644
--- a/clams/app/__init__.py
+++ b/clams/app/__init__.py
@@ -9,11 +9,11 @@
from datetime import datetime
from urllib import parse as urlparser
-__all__ = ['ClamsApp']
+__all__ = ['ClamsApp', 'ClamsPromptableApp', 'ClamsHFPromptableApp']
-from typing import Union, Any, Optional, Dict, List, Tuple
+from typing import Union, Any, Optional, Dict, List, Tuple, cast
-from mmif import Mmif, Document, DocumentTypes, View
+from mmif import Mmif, Document, DocumentTypes, View, AnnotationTypes
from mmif.utils.video_document_helper import (
SamplingMode, SAMPLING_MODE_DESCRIPTIONS, SAMPLING_MODE_DEFAULT,
_sampling_mode,
@@ -75,7 +75,7 @@ class ClamsApp(ABC):
# how vdh.extract_frames_by_mode() selects frames from TimeFrames.
# The value is intercepted in annotate() and pushed into a
# contextvars.ContextVar so that any vdh call inside _annotate()
- # picks it up automatically — app developers never need to handle
+ # picks it up automatically; app developers never need to handle
# this parameter themselves.
{
'name': 'tfSamplingMode', 'type': 'string',
@@ -116,7 +116,7 @@ def appmetadata(self, **kwargs: List[str]) -> str:
"""
# cast only, no refinement
casted = self.metadata_param_caster.cast(kwargs)
- pretty = casted.pop('pretty') if 'pretty' in casted else False
+ pretty = casted.get('pretty', False)
return self.metadata.jsonify(pretty)
def _load_appmetadata(self) -> AppMetadata:
@@ -131,7 +131,7 @@ def _load_appmetadata(self) -> AppMetadata:
In any case, :class:`~clams.appmetadata.AppMetadata` class must be useful.
For metadata specification,
- see `https://sdk.clams.ai/appmetadata.jsonschema <../appmetadata.jsonschema>`_.
+ see `https://clams.ai/clams-python/appmetadata.jsonschema <../appmetadata.jsonschema>`_.
"""
cwd = pathlib.Path(sys.modules[self.__module__].__file__).parent
@@ -185,7 +185,7 @@ def annotate(self, mmif: Union[str, dict, Mmif], **runtime_params: List[str]) ->
refined = self._refine_params(**runtime_params)
self.logger.debug(f"Refined parameters: {refined}")
pretty = refined.get('pretty', False)
- sampling_mode_str = refined.pop('tfSamplingMode', None)
+ sampling_mode_str = refined.get('tfSamplingMode', None)
if sampling_mode_str is not None:
_sampling_mode.set(SamplingMode(sampling_mode_str))
t = datetime.now()
@@ -639,6 +639,771 @@ def open_document_location(document: Union[str, Document], opener: Any = open, *
raise FileNotFoundError(p.path)
+# TODO (krim @ 05/28/26): maybe we should consider implementing
+# autodoc-based auto documentation export (e.g., ``automethod`` for
+# methods and a small Sphinx extension to render
+# ``promptable_parameters`` into the parameter table), instead of the
+# current hand-authored ``documentation/app-baseclasses.rst``.
+class ClamsPromptableApp(ClamsApp):
+ """
+ Base class for CLAMS apps that wrap a promptable model (an LLM or
+ other multimodal model, local or remote). Standardizes the runtime
+ parameter surface (prompt, generation hyperparameters, parallelism
+ control) and provides helpers for building chat conversations and
+ persisting model responses into MMIF.
+
+ The standardized parameters are listed in
+ :py:attr:`promptable_parameters` and added to an app's metadata via
+ :py:meth:`inject_promptable_parameters`. Promptable-app developers
+ MUST call that helper at the end of their ``appmetadata()`` function
+ in ``metadata.py``. The reservation rule (these parameter names are
+ SDK-managed and apps cannot redeclare them) is enforced implicitly
+ via :py:meth:`AppMetadata.add_parameter`'s existing duplicate-name
+ check.
+
+ Inference is performed by :py:meth:`generate`, which subclasses MUST
+ implement. The base class provides:
+
+ * :py:meth:`inject_promptable_parameters` : adds the SDK-managed
+ parameter set to ``AppMetadata``
+ * :py:meth:`build_conversation` : assembles a chat-template-compatible
+ message list from a prompt plus optional images/audios
+ * :py:meth:`response_to_grounded_textdocument` : persists a
+ generated response into a view as ``TextDocument`` +
+ ``Alignment`` (+ optional ``origins`` / ``origination``)
+ """
+
+ #: SDK-managed runtime parameters injected into every promptable app.
+ #: These names are reserved; apps cannot redeclare them with
+ #: customized specs.
+ promptable_parameters = [
+ {
+ 'name': 'prompt', 'type': 'string', 'multivalued': True,
+ 'description':
+ 'User prompt(s) sent to the model. A single value runs as a '
+ 'one-shot generation. A multi-value list is interpreted as a '
+ 'multi-turn static prompt; see ``promptMode`` for how turns '
+ 'are assembled.',
+ },
+ {
+ 'name': 'systemPrompt', 'type': 'string', 'default': '',
+ 'description':
+ 'Optional system-role text prepended to the conversation. '
+ 'Empty by default.',
+ },
+ {
+ 'name': 'promptMode', 'type': 'string',
+ 'choices': ['user-only', 'turn-taking'],
+ 'default': 'turn-taking',
+ 'description':
+ 'How to interpret a multi-value ``prompt`` list. '
+ 'Has no effect when ``prompt`` has a single value. '
+ 'For semantics of each choice and worked examples, see '
+ 'https://clams.ai/clams-python/app-baseclasses.html#promptable-multiturn',
+ },
+ {
+ 'name': 'maxNewTokens', 'type': 'integer', 'default': 512,
+ 'description':
+ 'Maximum number of new tokens generated per inference call. '
+ 'Forwarded to the backend\'s ``generate``-equivalent. Larger '
+ 'values grow the KV cache linearly and increase GPU memory '
+ 'usage; reduce if VRAM is constrained.',
+ },
+ {
+ 'name': 'temperature', 'type': 'number', 'default': 0.0,
+ 'description':
+ 'Sampling temperature. The default ``0.0`` selects '
+ 'deterministic / greedy decoding for maximum reproducibility; '
+ 'override for sampled generation.',
+ },
+ {
+ 'name': 'topP', 'type': 'number', 'default': 1.0,
+ 'description':
+ 'Nucleus-sampling cumulative probability cutoff. Only '
+ 'meaningful when ``temperature`` is greater than 0.',
+ },
+ {
+ 'name': 'topK', 'type': 'integer', 'default': 50,
+ 'description':
+ 'Top-K sampling cutoff. Only meaningful when ``temperature`` '
+ 'is greater than 0.',
+ },
+ {
+ 'name': 'parallelPrompts', 'type': 'integer', 'default': 1,
+ 'description':
+ 'Number of independent prompts the app runs in parallel '
+ '(stacks into a single forward pass). The *size* of each '
+ 'prompt (how many images, how long the system/user text '
+ 'is, etc.) is NOT regulated by this parameter; that is '
+ 'each app\'s responsibility. Prompt count and per-prompt '
+ 'content size combine multiplicatively for GPU memory, '
+ 'so the two can blow up together. Catastrophic example: '
+ '``tfSamplingMode=all`` on a TimeFrame without '
+ '``targets`` expands that TF into one image per '
+ 'native-FPS frame (300 images for a 10-second TF at '
+ '30fps); ``parallelPrompts=4`` then runs 4 such prompts '
+ 'in one forward pass (~1200 images), guaranteed OOM. '
+ 'Keep at ``1`` on memory-tight setups; raise only when '
+ 'per-prompt content is small and bounded.',
+ },
+ ]
+
+ @staticmethod
+ def inject_promptable_parameters(metadata: AppMetadata) -> None:
+ """
+ Add the SDK-managed promptable parameters to ``metadata``. Call
+ this at the end of your app's ``appmetadata()`` function in
+ ``metadata.py`` if your app subclasses
+ :py:class:`ClamsPromptableApp`.
+
+ The reservation rule is enforced implicitly: if the app had
+ already called ``metadata.add_parameter('prompt', ...)`` (or
+ any other promptable name) before this helper, the helper's own
+ ``add_parameter`` call will trip the existing duplicate-name
+ ``ValueError`` in :py:meth:`AppMetadata.add_parameter`.
+
+ :param metadata: the :class:`AppMetadata` instance being built
+ """
+ for param in ClamsPromptableApp.promptable_parameters:
+ metadata.add_parameter(**param)
+
+ def __init__(self):
+ # ``ClamsApp.__init__`` loads the app's ``metadata.py``, which
+ # is expected to have already called
+ # ``inject_promptable_parameters()`` from inside
+ # ``appmetadata()``. The parent ``__init__`` then iterates
+ # ``self.metadata.parameters`` to populate
+ # ``annotate_param_spec`` and build the caster, so the
+ # promptable parameters are already covered by the time we land
+ # here. We only validate that the helper was actually called.
+ super().__init__()
+ declared = {p.name for p in self.metadata.parameters}
+ expected = {p['name'] for p in ClamsPromptableApp.promptable_parameters}
+ missing = expected - declared
+ if missing:
+ raise ValueError(
+ f"Promptable parameters {sorted(missing)} are missing "
+ f"from the app metadata. Promptable apps must call "
+ f"``ClamsPromptableApp.inject_promptable_parameters("
+ f"metadata)`` inside their ``appmetadata()`` function "
+ f"in ``metadata.py``."
+ )
+
+ @abstractmethod
+ def generate(
+ self,
+ prompt: List[str],
+ system_prompt: str = '',
+ images: Optional[List[List[Any]]] = None,
+ audios: Optional[List[List[Any]]] = None,
+ prompt_mode: str = 'turn-taking',
+ **generation_params,
+ ) -> List[str]:
+ """
+ Run N independent prompts in one inference call and return N
+ outputs. Subclasses MUST implement this.
+
+ Each inner list of ``images`` / ``audios`` is the bundled
+ multimodal content for ONE prompt -- the model sees those
+ items as one composite input and produces one output. The
+ outer list spans N prompts processed in parallel (when the
+ backend supports it; sequentially otherwise).
+
+ * Single-prompt call: ``images=[[img1, img2]]`` -> one output
+ (composite over the two bundled images).
+ * Per-input broadcast: ``images=[[img1], [img2], [img3]]`` ->
+ three outputs (one per image). Caller assembles the
+ singleton-wrap shape.
+ * Multimodal pair: ``images=[[img1]], audios=[[au1]]`` -> one
+ output. When both ``images`` and ``audios`` are given they
+ must have the same outer length; index ``i`` of each pairs
+ into prompt ``i``.
+
+ :param prompt: a ``List[str]`` of prompt turns. A
+ single-element list is one-shot. A multi-element list is
+ multi-turn and is assembled according to ``prompt_mode``.
+ :param system_prompt: optional system-role text prepended to
+ the conversation. Applies to every prompt in the batch.
+ :param images: optional ``List[List[Any]]`` -- N groups, one
+ per prompt; each inner list is the bundled images for that
+ prompt.
+ :param audios: optional ``List[List[Any]]`` -- N groups, one
+ per prompt; each inner list is the bundled audio clips
+ for that prompt.
+ :param prompt_mode: ``"turn-taking"`` (default) or
+ ``"user-only"``; see :py:attr:`promptable_parameters`.
+ :param generation_params: any additional backend-specific
+ generation kwargs (``maxNewTokens``, ``temperature``,
+ ``topP``, ``topK``, etc.).
+ :return: a ``List[str]`` with one entry per prompt in the
+ batch. For ``prompt_mode='user-only'`` multi-turn, each
+ prompt's entry is the assistant's final reply across its
+ N user turns.
+ :rtype: List[str]
+ """
+ raise NotImplementedError
+
+ def build_conversation(
+ self,
+ prompt: Union[str, List[str], List[dict]],
+ system_prompt: str = '',
+ images: Optional[List[Any]] = None,
+ audios: Optional[List[Any]] = None,
+ prompt_mode: str = 'turn-taking',
+ ) -> Union[List[dict], List[List[dict]]]:
+ """
+ Build a chat-template-compatible message list.
+
+ :param prompt: a plain string, a ``List[str]`` of prompt turns,
+ or a pre-built ``List[dict]`` of role/content message
+ objects (returned as-is; pass-through for advanced
+ callers that constructed the conversation themselves).
+ :param system_prompt: if non-empty, prepended as a
+ system-role message.
+ :param images: optional list of image inputs to include in the
+ (final) user turn's content. Each appears as a
+ ``{'type': 'image', 'image': }`` entry.
+ :param audios: optional list of audio inputs to include in the
+ (final) user turn's content. Each appears as a
+ ``{'type': 'audio', 'audio': }`` entry.
+ :param prompt_mode: ``"turn-taking"`` (default) or
+ ``"user-only"``. Only meaningful when ``prompt`` is a
+ multi-element list; ignored otherwise. See
+ :py:attr:`promptable_parameters` for semantics.
+
+ :returns:
+ * For single-shot prompts (string or single-element list)
+ and for multi-element ``turn-taking`` mode: a single
+ ``List[dict]`` of role/content messages, ready to feed
+ to a chat-template applier (e.g.,
+ ``processor.apply_chat_template``).
+ * For multi-element ``user-only`` mode: a
+ ``List[List[dict]]`` of N progressively-extending
+ conversation prefixes, one per user turn. Each prefix
+ ends in a user turn; assistant turns between users are
+ stored with ``content=None`` as placeholders for the
+ caller to fill in with successive generation results.
+
+ Subclasses may override to access model-specific state
+ (``self.processor``, ``self.tokenizer``, etc.) during
+ formatting; the base implementation is back-end-agnostic.
+ """
+ # Pass-through for pre-built message lists.
+ if isinstance(prompt, list) and prompt and all(
+ isinstance(p, dict) for p in prompt):
+ return cast(List[dict], prompt)
+
+ # Normalize to List[str].
+ if isinstance(prompt, str):
+ prompts = [prompt]
+ else:
+ prompts = list(prompt)
+
+ if len(prompts) == 1:
+ return self._build_single_turn(
+ prompts[0], system_prompt, images, audios)
+
+ if prompt_mode == 'turn-taking':
+ return self._build_turn_taking(
+ prompts, system_prompt, images, audios)
+ if prompt_mode == 'user-only':
+ return self._build_user_only(
+ prompts, system_prompt, images, audios)
+ raise ValueError(
+ f"Unknown prompt_mode: {prompt_mode!r}. "
+ f"Expected 'turn-taking' or 'user-only'.")
+
+ @staticmethod
+ def _make_user_content(text, images=None, audios=None):
+ """Build the content list for a user-role message."""
+ content = []
+ if images:
+ for img in images:
+ content.append({'type': 'image', 'image': img})
+ if audios:
+ for au in audios:
+ content.append({'type': 'audio', 'audio': au})
+ content.append({'type': 'text', 'text': text})
+ return content
+
+ def _build_single_turn(self, text, system_prompt, images, audios):
+ messages = []
+ if system_prompt:
+ messages.append({'role': 'system', 'content': system_prompt})
+ messages.append({
+ 'role': 'user',
+ 'content': self._make_user_content(text, images, audios),
+ })
+ return messages
+
+ def _build_turn_taking(self, prompts, system_prompt, images, audios):
+ """
+ Alternating user/assistant turns; one inference call.
+ Even indices in ``prompts`` are user turns, odd indices are
+ pre-written assistant exemplars. Images/audios (if any) are
+ attached to the final user turn (the actual query).
+ """
+ messages = []
+ if system_prompt:
+ messages.append({'role': 'system', 'content': system_prompt})
+ # index of the final user turn (the last even index)
+ last_user_idx = (len(prompts) - 1) - ((len(prompts) - 1) % 2)
+ for i, text in enumerate(prompts):
+ role = 'user' if i % 2 == 0 else 'assistant'
+ if role == 'user':
+ attach_media = (i == last_user_idx)
+ content = self._make_user_content(
+ text,
+ images if attach_media else None,
+ audios if attach_media else None,
+ )
+ messages.append({'role': 'user', 'content': content})
+ else:
+ messages.append({'role': 'assistant', 'content': text})
+ return messages
+
+ def _build_user_only(self, prompts, system_prompt, images, audios):
+ """
+ N progressively-extending conversation prefixes, one per user
+ turn. Assistant slots between users have ``content=None`` as
+ placeholders for the caller's successive generation results.
+ """
+ convs: List[List[dict]] = []
+ base: List[dict] = []
+ if system_prompt:
+ base.append({'role': 'system', 'content': system_prompt})
+ for i, text in enumerate(prompts):
+ # First user turn carries the images/audios (the initial query);
+ # subsequent user turns are text-only.
+ user_content = self._make_user_content(
+ text,
+ images if i == 0 else None,
+ audios if i == 0 else None,
+ )
+ base.append({'role': 'user', 'content': user_content})
+ # Snapshot the conversation as it stands at the start of
+ # the i-th generation call. Shallow-copy each message so
+ # later in-place edits (e.g., filling in the assistant
+ # placeholder) don't retroactively mutate earlier
+ # snapshots.
+ convs.append([dict(m) for m in base])
+ if i < len(prompts) - 1:
+ base.append({'role': 'assistant', 'content': None})
+ return convs
+
+ def response_to_grounded_textdocument(
+ self,
+ view: View,
+ source: str,
+ response: str,
+ origins: Optional[List[str]] = None,
+ origination: Optional[str] = None,
+ reasoning_trace: Optional[str] = None,
+ ) -> Tuple[Any, Any]:
+ """
+ Persist a single LLM text response into a view. Writes one
+ ``TextDocument`` (containing the response) plus possible
+ grounding via an ``Alignment`` annotation and ``origins`` /
+ ``origination`` properties on the TD.
+
+ The two grounding link kinds are semantically distinct:
+
+ * ``source`` is the *coarse* cross-modal grounding -- the
+ single annotation id that the response is anchored to.
+ Written into the new ``Alignment`` (``source -> td``).
+ Typical value: the parent ``TimeFrame`` for a
+ captioning/OCR app.
+ * ``origins`` are the *finer* derivation grounding -- a list
+ of annotation ids the response was specifically derived
+ from (e.g. the ``TimePoint``\\s whose frames were fed to
+ the model). Written into ``TextDocument.origins``. See
+ https://clams.ai/clams-vocabulary/Document for vocabulary
+ semantics.
+
+ :param view: the :class:`View` to write into. The caller is
+ responsible for having called
+ :meth:`View.new_contain` for ``TextDocument`` and
+ ``Alignment`` first if needed.
+ :param source: ``id`` of the annotation to record as the
+ cross-modal anchor of the response (see above).
+ :param response: the text generated by the model.
+ :param origins: optional list of ``id``\\s of annotations the
+ response was *derived* from. Must be paired with
+ ``origination``.
+ :param origination: nature of the derivation, written to
+ ``TextDocument.origination``. Accepted values per the
+ vocabulary include ``'derived'``, ``'transcription'``,
+ ``'topologically-identical'``. Must be paired with
+ ``origins``.
+ :param reasoning_trace: optional model-side reasoning trace
+ (a chain-of-thought / scratchpad string, NOT a Python
+ traceback). NOT YET SUPPORTED -- passing a non-``None``
+ value raises :py:class:`NotImplementedError`. Storage
+ convention is still being decided at
+ clamsproject/clams-python#263.
+ :return: ``(TextDocument, Alignment)`` tuple of the new
+ annotations.
+ :raises ValueError: if exactly one of ``origins`` /
+ ``origination`` is set; they must be supplied together
+ or both omitted.
+ """
+ if bool(origins) != bool(origination):
+ raise ValueError(
+ "`origins` and `origination` must be supplied together "
+ "or both omitted; got "
+ f"origins={origins!r}, origination={origination!r}."
+ )
+ td = view.new_textdocument(text=response)
+ if origins:
+ td.add_property('origins', origins)
+ td.add_property('origination', origination)
+ align = view.new_annotation(
+ AnnotationTypes.Alignment,
+ source=source,
+ target=td.id,
+ )
+ if reasoning_trace is not None:
+ raise NotImplementedError(
+ "Reasoning-trace storage convention is not yet defined; "
+ "tracked at clamsproject/clams-python#263."
+ )
+ return td, align
+
+
+class ClamsHFPromptableApp(ClamsPromptableApp):
+ """
+ Base class for promptable CLAMS apps backed by a local
+ HuggingFace ``transformers`` model. Layers HF-specific inference
+ plumbing on top of :class:`ClamsPromptableApp`: model loading
+ via :func:`clams.backends.hf.load_hf_model`, and a concrete
+ :py:meth:`generate` implementation that runs N independent
+ prompts in one HF forward pass via the standard
+ chat-template -> ``model.generate`` -> ``batch_decode`` pipeline.
+
+ Concrete subclasses declare the model class via :py:attr:`MODEL_CLS`
+ plus a handful of optional dtype/padding hints, and the family of
+ pinned model revisions via ``analyzer_versions`` in
+ ``metadata.py``. The SDK auto-derives a ``model`` runtime
+ parameter (choices = keys of ``analyzer_versions``), and the dev's
+ ``_annotate`` calls :py:meth:`load_model` to (lazily) load the
+ requested family member. Singleton families (one entry in
+ ``analyzer_versions``) eagerly pre-load in ``__init__`` so
+ single-model apps preserve warm-start semantics. Example::
+
+ class MyVLMCaptioner(ClamsHFPromptableApp):
+ MODEL_CLS = AutoModelForImageTextToText
+ DTYPE = torch.bfloat16
+ PADDING_SIDE = 'left'
+
+ # In metadata.py:
+ # analyzer_versions={
+ # "HuggingFaceTB/SmolVLM2-2.2B-Instruct": "482adb5",
+ # }
+ # plus a call to
+ # ClamsHFPromptableApp.inject_promptable_parameters(metadata).
+
+ def _annotate(self, mmif, **parameters):
+ self.load_model(parameters['model'])
+ # ... self.generate(prompt, images=image_groups, ...)
+ # ... self.response_to_grounded_textdocument(...)
+ ...
+
+ Requires the ``[hf]`` extra (``pip install clams-python[hf]``).
+ """
+
+ #: ``transformers`` model class (e.g.
+ #: :class:`~transformers.AutoModelForImageTextToText`,
+ #: :class:`~transformers.AutoModelForCausalLM`). Subclasses MUST
+ #: set this.
+ MODEL_CLS: Optional[Any] = None
+ #: ``transformers`` processor / tokenizer / feature-extractor
+ #: class. Defaults to :class:`~transformers.AutoProcessor` (set
+ #: by :func:`clams.backends.hf.load_hf_model` when ``None``).
+ PROCESSOR_CLS: Optional[Any] = None
+ #: Torch dtype for the model (e.g. ``torch.bfloat16``). When
+ #: ``None``, the model class's own default is used (typically
+ #: float32). Also used to cast ``pixel_values`` in
+ #: :py:meth:`generate`.
+ DTYPE: Optional[Any] = None
+ #: Tokenizer padding side. Set to ``'left'`` for decoder-only
+ #: batched generation; leave ``None`` otherwise.
+ PADDING_SIDE: Optional[str] = None
+ #: Extra kwargs forwarded to ``MODEL_CLS.from_pretrained()``.
+ MODEL_KWARGS: Optional[dict] = None
+ #: Extra kwargs forwarded to ``PROCESSOR_CLS.from_pretrained()``.
+ PROCESSOR_KWARGS: Optional[dict] = None
+
+ @staticmethod
+ def inject_promptable_parameters(metadata: AppMetadata) -> None:
+ """
+ Add the SDK-managed promptable parameters AND a ``model``
+ parameter derived from ``metadata.analyzer_versions`` to the
+ app metadata. Overrides
+ :py:meth:`ClamsPromptableApp.inject_promptable_parameters` for
+ HF apps; call this at the end of your app's ``appmetadata()``
+ function in ``metadata.py`` if your app subclasses
+ :py:class:`ClamsHFPromptableApp`.
+
+ :param metadata: the :class:`AppMetadata` instance being
+ built. ``metadata.analyzer_versions`` MUST already be set
+ to a non-empty ``Dict[str, str]`` (model id -> commit
+ hash); this helper reads it to derive the ``model``
+ parameter's choices.
+ :raises ValueError: if ``metadata.analyzer_versions`` is
+ missing or empty.
+ """
+ ClamsPromptableApp.inject_promptable_parameters(metadata)
+ analyzer_versions = metadata.analyzer_versions or {}
+ if not analyzer_versions:
+ raise ValueError(
+ "ClamsHFPromptableApp.inject_promptable_parameters "
+ "requires ``metadata.analyzer_versions`` to be a "
+ "non-empty dict (HF model id -> commit hash). Set "
+ "it on the ``AppMetadata`` constructor call before "
+ "invoking this helper.")
+ choices = list(analyzer_versions.keys())
+ default = choices[0] if len(choices) == 1 else None
+ metadata.add_parameter(
+ name='model',
+ type='string',
+ choices=choices,
+ default=default,
+ multivalued=False,
+ description=(
+ "HuggingFace model identifier to use for this "
+ "request. Must be one of the model ids declared in "
+ "this app's ``analyzer_versions``; the SDK pins the "
+ "corresponding commit hash at load time. When the "
+ "app ships a single model (the typical case), this "
+ "parameter defaults to that one model and can be "
+ "omitted. Pass the full HF model id (e.g. "
+ "``org/repo-name``); URL-encoding the ``/`` is "
+ "optional."
+ ),
+ )
+
+ def __init__(self):
+ super().__init__()
+ cls_name = type(self).__name__
+ if self.MODEL_CLS is None:
+ raise ValueError(
+ f"{cls_name} must set the ``MODEL_CLS`` class attribute "
+ f"(a ``transformers`` model class).")
+ analyzer_versions = self.metadata.analyzer_versions
+ if not analyzer_versions:
+ raise ValueError(
+ f"{cls_name} must declare ``analyzer_versions`` in "
+ f"``metadata.py`` as a non-empty Dict[str, str] "
+ f"mapping HuggingFace model ids to pinned commit "
+ f"hashes (7-char abbreviation is sufficient). This is "
+ f"required for reproducibility: an unpinned download "
+ f"silently floats on whatever ``main`` points at and "
+ f"cannot be reproduced. Singleton families (one "
+ f"entry) are fine; multi-model families list every "
+ f"member.")
+ if 'model' not in {p.name for p in self.metadata.parameters}:
+ raise ValueError(
+ f"{cls_name} must call "
+ f"``ClamsHFPromptableApp.inject_promptable_parameters"
+ f"(metadata)`` (the HF override that also adds the "
+ f"``model`` parameter) inside ``appmetadata()`` in "
+ f"``metadata.py``; calling "
+ f"``ClamsPromptableApp.inject_promptable_parameters`` "
+ f"directly skips the ``model`` parameter and trips "
+ f"this check.")
+ #: Per-(model_id, revision) cache of loaded
+ #: ``(processor, model, device)`` triples. Populated by
+ #: :py:meth:`load_model`; survives for the lifetime of this
+ #: app instance.
+ self._model_cache: Dict[Tuple[str, str], Tuple[Any, Any, str]] = {}
+ #: References to the currently-active loaded model. Set by
+ #: :py:meth:`load_model`; ``generate()`` and friends read
+ #: from here. ``None`` until the first ``load_model`` call
+ #: (or until ``__init__`` eager-loads a singleton family).
+ self.processor: Any = None
+ self.model: Any = None
+ self.device: Optional[str] = None
+ # Singleton families pre-load in ``__init__`` so single-model
+ # apps preserve warm-start UX (no first-request latency cost).
+ # Multi-member families defer to lazy loading on the first
+ # ``load_model`` call.
+ if len(analyzer_versions) == 1:
+ only_model_id = next(iter(analyzer_versions.keys()))
+ self.load_model(only_model_id)
+
+ def _refine_params(self, **runtime_params):
+ """
+ Expand ``model`` from the raw HF id (``org/name``) to
+ ``org/name@`` so the resolved revision lands in
+ ``view.metadata.appConfiguration['model']``.
+ """
+ refined = super()._refine_params(**runtime_params)
+ model_id = refined.get('model')
+ if isinstance(model_id, str) and '@' not in model_id:
+ revision = (self.metadata.analyzer_versions or {}).get(model_id)
+ if revision is not None:
+ refined['model'] = f"{model_id}@{revision}"
+ return refined
+
+ def load_model(
+ self, model_id_or_with_rev: str,
+ ) -> Tuple[Any, Any, str]:
+ """
+ Load (or return cached) ``(processor, model, device)`` for
+ the given model id. Accepts both refined (``org/name@rev``)
+ and raw (``org/name``) forms; for raw form, the revision is
+ looked up from ``self.metadata.analyzer_versions``. Caches
+ results per ``(model_id, revision)`` and updates
+ :py:attr:`self.processor`, :py:attr:`self.model`,
+ :py:attr:`self.device` to the loaded triple so subsequent
+ :py:meth:`generate` calls operate on it.
+
+ :param model_id_or_with_rev: HF model id, optionally with
+ ``@`` suffix.
+ :return: ``(processor, model, device)`` tuple for the loaded
+ model. Same references are also stored on ``self``.
+ :raises KeyError: if a raw model id is passed and is not in
+ ``analyzer_versions``.
+ """
+ if '@' in model_id_or_with_rev:
+ model_id, _, revision = model_id_or_with_rev.rpartition('@')
+ else:
+ model_id = model_id_or_with_rev
+ revision = self.metadata.analyzer_versions[model_id]
+ cache_key = (model_id, revision)
+ cached = self._model_cache.get(cache_key)
+ if cached is not None:
+ self.processor, self.model, self.device = cached
+ return cached
+ # Lazy import: avoids pulling torch/transformers into the base
+ # clams-python install. Apps using this class must have the
+ # ``[hf]`` extra installed.
+ from clams.backends.hf import load_hf_model
+ self.logger.info(f"Loading HF model from {model_id} @ {revision}")
+ triple = load_hf_model(
+ model_id,
+ self.MODEL_CLS,
+ processor_cls=self.PROCESSOR_CLS,
+ dtype=self.DTYPE,
+ padding_side=self.PADDING_SIDE,
+ revision=revision,
+ model_kwargs=self.MODEL_KWARGS,
+ processor_kwargs=self.PROCESSOR_KWARGS,
+ )
+ self.logger.info(f"HF model loaded on {triple[2]}")
+ self._model_cache[cache_key] = triple
+ self.processor, self.model, self.device = triple
+ return triple
+
+ def generate(
+ self,
+ prompt: List[str],
+ system_prompt: str = '',
+ images: Optional[List[List[Any]]] = None,
+ audios: Optional[List[List[Any]]] = None,
+ prompt_mode: str = 'turn-taking',
+ **generation_params,
+ ) -> List[str]:
+ """
+ Default implementation of the
+ :py:meth:`ClamsPromptableApp.generate` contract for
+ HuggingFace ``transformers`` models. Runs N prompts in one
+ forward pass; returns N decoded strings.
+
+ Each inner list of ``images`` / ``audios`` is the bundled
+ content for one prompt. When both ``images`` and ``audios``
+ are given they must have the same outer length (multimodal
+ pairs are stitched by index). When both are ``None``, runs as
+ a single text-only prompt.
+
+ The default body is the canonical HF chat-model pipeline:
+ :py:meth:`build_conversation` -> ``apply_chat_template`` ->
+ ``model.generate`` -> ``batch_decode``. Subclasses can
+ customize finer-grained pieces via
+ :py:meth:`build_conversation` (model-specific message shape)
+ and :py:meth:`build_gen_kwargs` (model-specific generation
+ kwargs) without touching this method.
+ """
+ if images is not None and audios is not None:
+ if len(images) != len(audios):
+ raise ValueError(
+ f"images and audios must have the same outer length "
+ f"when both are given; got "
+ f"{len(images)} vs {len(audios)}.")
+ if images is not None:
+ n = len(images)
+ elif audios is not None:
+ n = len(audios)
+ else:
+ n = 1 # text-only single prompt
+ if n == 0:
+ return []
+ gen_kwargs = self.build_gen_kwargs(**generation_params)
+ try:
+ conversations = [
+ self.build_conversation(
+ prompt, system_prompt=system_prompt,
+ images=images[i] if images is not None else None,
+ audios=audios[i] if audios is not None else None,
+ prompt_mode=prompt_mode)
+ for i in range(n)
+ ]
+ inputs = self.processor.apply_chat_template(
+ conversations,
+ add_generation_prompt=True,
+ tokenize=True,
+ return_dict=True,
+ padding=True,
+ return_tensors="pt",
+ )
+ inputs = inputs.to(self.device)
+ if (self.DTYPE is not None
+ and 'pixel_values' in inputs
+ and inputs['pixel_values'] is not None):
+ inputs['pixel_values'] = inputs['pixel_values'].to(
+ dtype=self.DTYPE)
+ generated_ids = self.model.generate(**inputs, **gen_kwargs)
+ input_len = inputs.input_ids.shape[1]
+ new_tokens = generated_ids[:, input_len:]
+ return self.processor.batch_decode(
+ new_tokens, skip_special_tokens=True)
+ except Exception as e:
+ self.logger.error(
+ f"Error processing batch: {e}", exc_info=True)
+ return [''] * n
+
+ @staticmethod
+ def build_gen_kwargs(
+ max_new_tokens: int = 512,
+ temperature: float = 0.0,
+ top_p: float = 1.0,
+ top_k: int = 50,
+ **_unused,
+ ) -> dict:
+ """
+ Translate the SDK's promptable-parameter values into
+ HuggingFace ``model.generate()`` kwargs. Greedy decoding
+ (``do_sample=False``) when ``temperature == 0.0``; sampled
+ decoding with the given ``top_p`` / ``top_k`` otherwise.
+
+ Subclasses MAY override to add model-specific generation
+ kwargs (``num_beams``, ``repetition_penalty``, custom
+ stopping criteria, ``do_sample`` overrides, etc.). The base
+ implementation accepts any extra keyword args and silently
+ ignores them, so subclasses can pass through the full
+ ``**parameters`` dict from ``_annotate`` without filtering.
+ """
+ gen_kwargs = {'max_new_tokens': max_new_tokens}
+ if temperature > 0:
+ gen_kwargs.update({
+ 'do_sample': True,
+ 'temperature': temperature,
+ 'top_p': top_p,
+ 'top_k': top_k,
+ })
+ return gen_kwargs
+
+
class ParameterCaster(object):
def __init__(self, param_spec: Dict[str, Tuple[str, bool]]):
diff --git a/clams/backends/__init__.py b/clams/backends/__init__.py
new file mode 100644
index 0000000..d9fe452
--- /dev/null
+++ b/clams/backends/__init__.py
@@ -0,0 +1,9 @@
+"""
+Optional model-backend helpers for CLAMS apps.
+
+Each backend is a separate submodule. Heavy dependencies (e.g.,
+``torch``, ``transformers``) are NOT pulled in by the base
+``clams-python`` install; users opt in via pip extras such as
+``pip install clams-python[hf]`` for the HuggingFace transformers
+backend.
+"""
diff --git a/clams/backends/hf.py b/clams/backends/hf.py
new file mode 100644
index 0000000..b2dcfab
--- /dev/null
+++ b/clams/backends/hf.py
@@ -0,0 +1,247 @@
+"""
+HuggingFace transformers backend helpers.
+
+Two general loaders that wrap the device / kwargs / inference-mode
+boilerplate every HF-backed CLAMS app does identically:
+
+* :func:`load_hf_model` -- ``from_pretrained()`` flow for any model
+ class (instruction-tuned LLMs/VLMs, encoder-only classifiers,
+ vision/audio feature extractors, etc.). Use when the app needs raw
+ access to the underlying model and processor.
+* :func:`load_hf_pipeline` -- task-level :func:`transformers.pipeline`
+ flow (ASR, NER, text classification, zero-shot, etc.). Use when
+ pipeline-level inference is sufficient.
+
+``torch`` and ``transformers`` are optional dependencies. Install them
+via the ``[hf]`` extra::
+
+ pip install clams-python[hf]
+
+Imports are lazy: this module can be referenced from
+:mod:`clams.app` without triggering an ``ImportError`` on a base
+``clams-python`` install. The :class:`ImportError` only fires when a
+loader is actually called without the extras.
+"""
+from typing import Any, Optional, Tuple, Union
+
+
+def load_hf_model(
+ model_id: str,
+ model_cls,
+ processor_cls=None,
+ dtype=None,
+ device: Optional[str] = None,
+ padding_side: Optional[str] = None,
+ revision: Optional[str] = None,
+ model_kwargs: Optional[dict] = None,
+ processor_kwargs: Optional[dict] = None,
+ move_to_device: bool = True,
+) -> Tuple[Any, Any, str]:
+ """
+ Load a HuggingFace ``transformers`` model via ``from_pretrained``
+ and return it ready for inference.
+
+ :param model_id: HuggingFace model identifier (e.g., a Hub repo
+ name or a local path) forwarded to ``from_pretrained``.
+ :param model_cls: a ``transformers`` model class (e.g.,
+ ``AutoModelForCausalLM``, ``AutoModelForImageTextToText``,
+ ``ConvNextV2Model``, ``ViTModel``, ...). Whatever supports
+ ``from_pretrained()``.
+ :param processor_cls: a processor / tokenizer / feature-extractor
+ class with ``from_pretrained()``. Defaults to
+ ``transformers.AutoProcessor``. Pass ``transformers.AutoTokenizer``,
+ ``transformers.AutoImageProcessor``, etc. for narrower cases.
+ Pass ``None`` explicitly to skip processor loading entirely
+ (the returned ``processor`` in that case is ``None``).
+ :param dtype: torch dtype for the model (e.g., ``torch.bfloat16``).
+ When ``None`` (default), no ``torch_dtype`` kwarg is forwarded
+ to ``from_pretrained`` -- the model class uses its own default
+ (typically float32). Set explicitly for low-precision LLM
+ inference.
+ :param device: target device string (e.g., ``'cuda'``, ``'cpu'``,
+ ``'cuda:0'``). When ``None`` (default), the helper auto-detects
+ cuda availability and falls back to cpu.
+ :param padding_side: if set (typically ``'left'`` for decoder-only
+ models doing batched generation), the helper configures the
+ underlying tokenizer's ``padding_side`` and -- when no pad
+ token is set -- uses the EOS token as the pad token. Leave
+ ``None`` for encoder / non-batched cases (the tokenizer's own
+ default is preserved).
+ :param revision: optional Git revision (commit hash, branch name,
+ or tag) on the Hub repository to pin the download to. When
+ set, forwarded as ``revision=...`` to both
+ ``model_cls.from_pretrained`` and
+ ``processor_cls.from_pretrained``, ensuring the model and
+ processor are loaded from the same commit. Strongly recommended
+ for production: pinning a commit hash makes the analyzer
+ artifact reproducible and immune to upstream silent updates.
+ Apps calling this helper directly should record the same hash
+ on ``analyzer_version`` (or ``analyzer_versions``) in
+ ``metadata.py`` so the output MMIF identifies the exact
+ artifact. Apps inheriting from
+ :class:`~clams.app.ClamsHFPromptableApp` do not call this
+ helper -- the base class reads ``analyzer_versions`` from the
+ app metadata and forwards the resolved revision automatically.
+ :param model_kwargs: extra kwargs forwarded to
+ ``model_cls.from_pretrained()`` (e.g.,
+ ``{'use_safetensors': True, 'add_pooling_layer': False}``).
+ :param processor_kwargs: extra kwargs forwarded to
+ ``processor_cls.from_pretrained()`` (e.g.,
+ ``{'use_safetensors': True, 'use_fast': True}``).
+ :param move_to_device: when ``True`` (default), the helper moves
+ the loaded model to the resolved device and switches it to
+ ``eval()`` mode -- the right behavior for a "ready for
+ inference" app loader. When ``False``, both steps are
+ skipped; the model is returned in the state
+ ``from_pretrained`` left it (on CPU, in train mode). Use
+ ``False`` for library-style HF wrappers that defer device
+ placement and inference-mode switching to a downstream
+ consumer (e.g. an extractor class that may be combined with
+ a head and only then placed on a device by the wrapping
+ classifier). The returned ``device`` is still the resolved
+ target, so the consumer can use it later for its own
+ ``.to(device)`` call.
+
+ :returns: ``(processor, model, device)`` tuple. ``processor`` is
+ the loaded processor/tokenizer/feature-extractor (or ``None``
+ if ``processor_cls`` was explicitly set to ``None``).
+ ``device`` is the resolved device string (the model was moved
+ there iff ``move_to_device=True``).
+ :rtype: Tuple[Any, Any, str]
+ :raises ImportError: if ``torch`` or ``transformers`` is not
+ installed. Install the ``[hf]`` extra to fix.
+ """
+ try:
+ import torch # pytype: disable=import-error
+ except ImportError as e:
+ raise ImportError(
+ "clams.backends.hf requires the `torch` package. "
+ "Install with: pip install clams-python[hf]"
+ ) from e
+ try:
+ import transformers # pytype: disable=import-error
+ except ImportError as e:
+ raise ImportError(
+ "clams.backends.hf requires the `transformers` package. "
+ "Install with: pip install clams-python[hf]"
+ ) from e
+
+ resolved_device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
+
+ # Processor.
+ if processor_cls is None and processor_kwargs is None:
+ # default to AutoProcessor
+ processor_cls = transformers.AutoProcessor
+ if processor_cls is not None:
+ processor_load_kwargs = dict(processor_kwargs or {})
+ if revision is not None:
+ processor_load_kwargs.setdefault('revision', revision)
+ processor = processor_cls.from_pretrained(
+ model_id, **processor_load_kwargs)
+ if padding_side is not None:
+ tokenizer = getattr(processor, 'tokenizer', processor)
+ tokenizer.padding_side = padding_side
+ if getattr(tokenizer, 'pad_token', None) is None:
+ eos = getattr(tokenizer, 'eos_token', None)
+ if eos is not None:
+ tokenizer.pad_token = eos
+ else:
+ processor = None
+
+ # Model.
+ model_load_kwargs = dict(model_kwargs or {})
+ if dtype is not None:
+ model_load_kwargs['torch_dtype'] = dtype
+ if revision is not None:
+ model_load_kwargs.setdefault('revision', revision)
+ model = model_cls.from_pretrained(model_id, **model_load_kwargs)
+ if move_to_device:
+ model = model.to(resolved_device)
+ model.eval()
+
+ return processor, model, resolved_device
+
+
+def load_hf_pipeline(
+ task: str,
+ model_id: str,
+ device: Optional[Union[str, int]] = None,
+ revision: Optional[str] = None,
+ model_kwargs: Optional[dict] = None,
+ pipeline_kwargs: Optional[dict] = None,
+) -> Tuple[Any, Union[str, int]]:
+ """
+ Load a HuggingFace :func:`transformers.pipeline` for ``task`` and
+ return it ready for inference. Wraps the device / revision /
+ kwargs-forwarding boilerplate that every pipeline-backed CLAMS
+ app does identically. Use this for apps wrapping a task-level
+ pipeline (ASR via ``"automatic-speech-recognition"``, NER via
+ ``"token-classification"``, text classification, zero-shot, etc.);
+ use :func:`load_hf_model` instead when the app needs raw access
+ to the underlying model / processor (e.g., for custom chat-template
+ formatting or batched ``generate`` calls).
+
+ :param task: pipeline task string forwarded to
+ :func:`transformers.pipeline` (e.g.,
+ ``"automatic-speech-recognition"``, ``"token-classification"``).
+ :param model_id: HuggingFace model identifier (Hub repo name or
+ local path) forwarded to ``pipeline(model=...)``.
+ :param device: target device. Accepts the string form
+ (``'cuda'``, ``'cpu'``, ``'cuda:0'``) for parity with
+ :func:`load_hf_model`, or the integer form accepted natively
+ by ``pipeline`` (``-1`` for CPU, ``0+`` for GPU index). When
+ ``None`` (default), auto-detects cuda availability and falls
+ back to cpu (string form).
+ :param revision: optional Git revision (commit hash, branch, or
+ tag) on the Hub to pin the download to. Strongly recommended
+ for production; see :func:`load_hf_model` for rationale.
+ :param model_kwargs: extra kwargs forwarded to the underlying
+ ``model.from_pretrained()`` via the
+ ``pipeline(model_kwargs={...})`` channel.
+ :param pipeline_kwargs: extra kwargs forwarded directly to
+ :func:`transformers.pipeline` (e.g. ``generate_kwargs``,
+ ``tokenizer``, ``feature_extractor``, ``batch_size``,
+ ``framework``). ``model``, ``task``, ``device``, ``revision``,
+ and ``model_kwargs`` are owned by this helper -- explicit
+ helper args take precedence if any collide.
+ :returns: ``(pipeline, device)`` tuple. ``device`` is the resolved
+ device the pipeline is on, in the form it was passed (or the
+ auto-resolved string form when ``device=None``).
+ :rtype: Tuple[Any, Union[str, int]]
+ :raises ImportError: if ``torch`` or ``transformers`` is not
+ installed. Install the ``[hf]`` extra to fix.
+ """
+ try:
+ import torch # pytype: disable=import-error
+ except ImportError as e:
+ raise ImportError(
+ "clams.backends.hf requires the `torch` package. "
+ "Install with: pip install clams-python[hf]"
+ ) from e
+ try:
+ from transformers import pipeline # pytype: disable=import-error
+ except ImportError as e:
+ raise ImportError(
+ "clams.backends.hf requires the `transformers` package. "
+ "Install with: pip install clams-python[hf]"
+ ) from e
+
+ resolved_device = device if device is not None else (
+ 'cuda' if torch.cuda.is_available() else 'cpu')
+
+ pipeline_call_kwargs = dict(pipeline_kwargs or {})
+ # Helper-owned keys: explicit args win on collision.
+ for owned in ('task', 'model', 'device'):
+ pipeline_call_kwargs.pop(owned, None)
+ if model_kwargs:
+ pipeline_call_kwargs['model_kwargs'] = dict(model_kwargs)
+ if revision is not None:
+ pipeline_call_kwargs['revision'] = revision
+
+ pipe = pipeline(
+ task,
+ model=model_id,
+ device=resolved_device,
+ **pipeline_call_kwargs,
+ )
+ return pipe, resolved_device
diff --git a/clams/develop/__init__.py b/clams/develop/__init__.py
index 4925780..5cbd84f 100644
--- a/clams/develop/__init__.py
+++ b/clams/develop/__init__.py
@@ -18,7 +18,13 @@
'description': 'GtiHub Actions workflow files specific to `clamsproject` GitHub organization',
'sourcedir': 'gha',
'targetdir': '.github',
- }
+ },
+ 'utl-tf': {
+ 'description': 'Local helper module for iterating TimeFrames and collecting per-TF frame tasks '
+ '(baked into ``utils/timeframe.py``; backend-agnostic, safe to edit/delete)',
+ 'sourcedir': 'utl-tf',
+ 'targetdir': 'utils',
+ },
}
@@ -65,12 +71,20 @@ def bake(self, update_level=0):
if recipe == 'gha':
# There's nothing for devs to tweak GHA template, so first generation and updating are the same.
self.bake_gha(src_dir, dst_dir)
+ if recipe.startswith('utl-'):
+ # Utility recipes bake static helper modules; once baked the
+ # code is local to the app and devs are free to edit. No
+ # templating-variable substitution is needed -- pass an
+ # empty dict so ``safe_substitute`` is a no-op.
+ if dst_dir.exists() and update_level == 0:
+ raise FileExistsError(f" {dst_dir} already exists. Did you mean `--update`? ")
+ self.bake_app(src_dir, dst_dir, {})
def bake_app(self, src_dir, dst_dir, templating_vars):
for g in src_dir.glob("**/*.template"):
r = g.relative_to(src_dir).parent
f = g.with_suffix('').name
- (dst_dir / r).mkdir(exist_ok=True)
+ (dst_dir / r).mkdir(parents=True, exist_ok=True)
with open(g, 'r') as in_f, open(dst_dir/r/f, 'w') as out_f:
tmpl_to_compile = Template(in_f.read())
diff --git a/clams/develop/templates/app/app.py.template b/clams/develop/templates/app/app.py.template
index a7a4cc5..d2eb9b0 100644
--- a/clams/develop/templates/app/app.py.template
+++ b/clams/develop/templates/app/app.py.template
@@ -24,19 +24,64 @@ from mmif import Mmif, View, Annotation, Document, AnnotationTypes, DocumentType
from lapps.discriminators import Uri
+# =============================================================================
+# Pick a base class for your app:
+#
+# ClamsApp ............ default; the rest of this scaffold inherits from it.
+# Implement ``_annotate()``. That's it.
+# Choose for any non-LLM/VLM app: classical OCR /
+# ASR engines, classifiers, rule-based tools, etc.
+#
+# ClamsPromptableApp .. for prompt-driven LLM/VLM/ALM/LMM apps wrapping a
+# non-HF backend (remote APIs like OpenAI/Anthropic,
+# vLLM, custom inference servers).
+# Implement: ``_annotate()`` + ``generate()``.
+# Import:
+# from clams import ClamsPromptableApp
+# Also in ``metadata.py``: uncomment the
+# ``inject_promptable_parameters`` block.
+#
+# ClamsHFPromptableApp for prompt-driven apps wrapping a local HuggingFace
+# ``transformers`` model (the typical VLM/LLM case).
+# Implement: ``_annotate()`` (call
+# ``self.load_model(parameters['model'])`` first) +
+# declare class attributes:
+# MODEL_CLS =
+# DTYPE = torch.bfloat16 # optional
+# PADDING_SIDE = 'left' # optional
+# Import:
+# from clams.app import ClamsHFPromptableApp
+# Also in ``metadata.py``: set
+# ``analyzer_versions={: , ...}``
+# on the ``AppMetadata`` call, and uncomment the
+# ``ClamsHFPromptableApp.inject_promptable_parameters``
+# block (the HF override of the plain helper).
+# Requires the ``[hf]`` extra:
+# pip install clams-python[hf]
+# Singleton ``analyzer_versions`` families pre-load
+# in ``__init__`` (warm start); multi-member
+# families load on the first ``load_model`` call
+# and cache thereafter. ``generate()``,
+# ``build_conversation``, and ``build_gen_kwargs``
+# have working defaults; override only for
+# model-specific quirks.
+#
+# See https://clams.ai/clams-python/app-baseclasses.html for the full
+# developer guide.
+# =============================================================================
class $APP_CLASS_NAME(ClamsApp):
def __init__(self):
super().__init__()
def _appmetadata(self):
- # see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._load_appmetadata
+ # see https://clams.ai/clams-python/autodoc/clams.app.html#clams.app.ClamsApp._load_appmetadata
# Also check out ``metadata.py`` in this directory.
# When using the ``metadata.py`` leave this do-nothing "pass" method here.
pass
def _annotate(self, mmif: Mmif, **parameters) -> Mmif:
- # see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._annotate
+ # see https://clams.ai/clams-python/autodoc/clams.app.html#clams.app.ClamsApp._annotate
raise NotImplementedError
def get_app():
diff --git a/clams/develop/templates/app/metadata.py.template b/clams/develop/templates/app/metadata.py.template
index 93aec79..2de03a5 100644
--- a/clams/develop/templates/app/metadata.py.template
+++ b/clams/develop/templates/app/metadata.py.template
@@ -16,8 +16,8 @@ def appmetadata() -> AppMetadata:
"""
Function to set app-metadata values and return it as an ``AppMetadata`` obj.
Read these documentations before changing the code below
- - https://sdk.clams.ai/appmetadata.html metadata specification.
- - https://sdk.clams.ai/autodoc/clams.appmetadata.html python API
+ - https://clams.ai/clams-python/appmetadata.html metadata specification.
+ - https://clams.ai/clams-python/autodoc/clams.appmetadata.html python API
:return: AppMetadata object holding all necessary information.
"""
@@ -51,7 +51,48 @@ def appmetadata() -> AppMetadata:
metadata.add_parameter(name='a_param', description='example parameter description',
type='boolean', default='false')
# metadta.add_parameter(more...)
-
+
+ # If your app subclasses ``ClamsPromptableApp`` (a prompt-driven LLM/VLM/audio-LM
+ # app on a non-HF backend), uncomment the following two lines to add the
+ # SDK-managed promptable parameters (prompt, systemPrompt, temperature,
+ # maxNewTokens, etc.) to your app's metadata. See
+ # https://clams.ai/clams-python/app-baseclasses.html#promptable for the
+ # developer guide. Reminder: these parameter names are reserved by the SDK;
+ # do not redeclare any of them above.
+ # from clams.app import ClamsPromptableApp
+ # ClamsPromptableApp.inject_promptable_parameters(metadata)
+ #
+ # If your app subclasses ``ClamsHFPromptableApp`` (HF transformers backend),
+ # use the HF override of the same helper -- it injects the promptable
+ # parameters AND a ``model`` parameter derived from ``analyzer_versions``.
+ # Also set ``analyzer_versions={: , ...}`` on the
+ # ``AppMetadata(...)`` call above (replaces the singular
+ # ``analyzer_version`` for HF apps). See
+ # https://clams.ai/clams-python/app-baseclasses.html#hf-promptable for details.
+ # from clams.app import ClamsHFPromptableApp
+ # ClamsHFPromptableApp.inject_promptable_parameters(metadata)
+ #
+ # To customize the default value of any promptable parameter (e.g. provide an
+ # app-specific default ``prompt``, raise ``maxNewTokens``, pin ``parallelPrompts``,
+ # etc.), mutate the ``default`` field on the already-injected parameter
+ # object; the SDK does NOT allow re-declaring promptable param names. See
+ # https://clams.ai/clams-python/app-baseclasses.html#promptable-customizing-defaults
+ # for details. Example:
+ # for p in metadata.parameters:
+ # if p.name == 'prompt':
+ # p.default = ['Describe what is in this image.']
+ # elif p.name == 'maxNewTokens':
+ # p.default = 2048
+ #
+ # HF-only: the ``model`` parameter the HF helper injects gets its ``default``
+ # auto-set to the only key when ``analyzer_versions`` has a single entry
+ # (singleton family); for multi-member families the default is ``None`` and
+ # the caller MUST pass ``model=...`` on every request. To provide a
+ # recommended pick instead, mutate ``default`` the same way:
+ # for p in metadata.parameters:
+ # if p.name == 'model':
+ # p.default = '/'
+
# CHANGE this line and make sure return the compiled `metadata` instance
return None
diff --git a/clams/develop/templates/utl-tf/__init__.py.template b/clams/develop/templates/utl-tf/__init__.py.template
new file mode 100644
index 0000000..e69de29
diff --git a/clams/develop/templates/utl-tf/timeframe.py.template b/clams/develop/templates/utl-tf/timeframe.py.template
new file mode 100644
index 0000000..d2c8d7b
--- /dev/null
+++ b/clams/develop/templates/utl-tf/timeframe.py.template
@@ -0,0 +1,154 @@
+"""
+TimeFrame iteration / frame-sampling helpers, local to this app.
+
+Generated by ``clams develop -r utl-tf``. The code in this file is part
+of your app, not the SDK; edit it freely, refactor as needed, or delete
+the whole file if your app does not iterate TimeFrame annotations.
+
+The functions here factor out the canonical pattern that any CLAMS app
+processing video by TimeFrames tends to write:
+
+ 1. iterate TimeFrame annotations across input views, optionally
+ filtered by label
+ 2. sample frames per TF using the universal ``tfSamplingMode``
+ parameter (representative TimePoints, the middle representative,
+ or every target / native-FPS frame)
+ 3. when ``vdh`` returns a fallback timestamp (milliseconds, no
+ existing TP behind it), mint a fresh ``TimePoint`` annotation in
+ the app's new view so downstream code has a stable anchor id
+ 4. assemble per-TF task tuples that downstream batching /
+ inference / annotation code can consume uniformly
+
+The helpers are backend-agnostic: tasks can feed a HuggingFace VLM, a
+remote LLM API, a classical CV pipeline, or any other per-frame
+processor. They have no dependency on ``clams.app.ClamsPromptableApp``
+or any other promptable / inference machinery.
+
+These functions are scaffolded into each app so individual apps can
+edit them freely while the pattern stabilizes across the ecosystem.
+Once the shape converges across several apps, the helpers are good
+candidates for promotion into a shared package -- either
+``mmif.utils`` (for the pure-MMIF iteration / TP minting pieces, which
+have no clams-app dependency) or ``clams.`` (for the
+task-tuple composition that does presuppose the "writing into a new
+view" CLAMS-app idiom). If/when that happens, apps would import the
+shared version and delete this local copy.
+"""
+from typing import Any, Iterator, List, Optional, Tuple, Union
+
+from mmif import Annotation, Document, Mmif, View, AnnotationTypes
+from mmif.utils import video_document_helper as vdh
+
+
+def iter_timeframes(
+ mmif: Mmif, tflabels_of_interest: List[str],
+) -> Iterator[Annotation]:
+ """
+ Yield every TimeFrame annotation in ``mmif``, filtered by
+ ``tflabels_of_interest`` when non-empty.
+
+ :param mmif: the input MMIF object.
+ :param tflabels_of_interest: when non-empty, only TFs whose
+ ``label`` property matches one of these are yielded. An
+ empty list (the default in most apps) yields every TF
+ regardless of label.
+ """
+ for view in mmif.get_all_views_contain(AnnotationTypes.TimeFrame):
+ for tf in view.get_annotations(AnnotationTypes.TimeFrame):
+ if (tflabels_of_interest
+ and tf.get_property('label') not in tflabels_of_interest):
+ continue
+ yield tf
+
+
+def to_timepoints(
+ parent_view: View,
+ video_doc: Document,
+ sources: List[Union[str, int]],
+) -> List[str]:
+ """
+ Normalize a list of frame ``sources`` (as returned by
+ :func:`vdh.extract_images_by_mode_with_sources`) into a parallel
+ list of TimePoint ``id``\\ s.
+
+ Each ``source`` is either:
+
+ * ``str`` -- the id of an existing TimePoint annotation
+ (representative / target). Passed through unchanged.
+ * ``int`` -- a millisecond timestamp returned by ``vdh`` for the
+ interval-fallback case (``tfSamplingMode=single`` with no
+ representatives, or ``tfSamplingMode=all`` with no targets).
+ A fresh ``TimePoint`` annotation is minted in ``parent_view``
+ at this timestamp; the new annotation's id is returned.
+
+ The ``TimePoint`` type is registered with
+ ``parent_view.new_contain()`` lazily on the first mint, so apps
+ that never hit the fallback path do not get an empty
+ ``TimePoint`` entry in their view metadata.
+
+ :param parent_view: the view this app is writing into; receives
+ any freshly-minted TimePoints.
+ :param video_doc: the source VideoDocument; recorded as
+ ``document`` on each minted TimePoint.
+ :param sources: per-frame source identifiers from ``vdh``.
+ :return: a list of TimePoint ids, parallel to ``sources``.
+ """
+ tp_contain_registered = False
+ out: List[str] = []
+ for src in sources:
+ if isinstance(src, str):
+ out.append(src)
+ else:
+ if not tp_contain_registered:
+ parent_view.new_contain(AnnotationTypes.TimePoint)
+ tp_contain_registered = True
+ tp = parent_view.new_annotation(
+ AnnotationTypes.TimePoint,
+ document=video_doc.id,
+ timePoint=int(src),
+ timeUnit='milliseconds',
+ )
+ out.append(tp.id)
+ return out
+
+
+def collect_timeframes_of_interest(
+ mmif: Mmif,
+ parent_view: View,
+ video_doc: Document,
+ tflabels_of_interest: List[str],
+) -> List[Tuple[List[Any], List[str], str, Optional[str]]]:
+ """
+ Convenience composition of :func:`iter_timeframes`,
+ :func:`vdh.extract_images_by_mode_with_sources`, and
+ :func:`to_timepoints`. Returns one
+ ``(images, tp_ids, tf_id, tf_label)`` task per matching TimeFrame
+ that produced at least one sampled frame.
+
+ Each task's ``images`` and ``tp_ids`` are parallel lists -- one
+ entry per frame sampled from that TF (length 1 for
+ ``tfSamplingMode=single``, N for ``representatives`` / ``all``).
+ Each entry of ``tp_ids`` is either the id of an existing
+ TimePoint or the id of a freshly-minted one (see
+ :func:`to_timepoints`). ``tf_label`` is the source TimeFrame's
+ ``label`` property value, or ``None`` if unset.
+
+ :param mmif: the input MMIF.
+ :param parent_view: the view this app is writing into.
+ :param video_doc: the source VideoDocument that frames are
+ extracted from.
+ :param tflabels_of_interest: optional label filter; empty list =
+ no filter.
+ :return: per-TF task tuples, ready to feed a batched inference
+ loop or any other per-frame processor.
+ """
+ tasks: List[Tuple[List[Any], List[str], str, Optional[str]]] = []
+ for tf in iter_timeframes(mmif, tflabels_of_interest):
+ images, sources = vdh.extract_images_by_mode_with_sources(
+ mmif, tf, as_PIL=True)
+ if not images:
+ continue
+ tp_ids = to_timepoints(parent_view, video_doc, sources)
+ tf_label = tf.get_property('label')
+ tasks.append((list(images), tp_ids, tf.id, tf_label))
+ return tasks
diff --git a/documentation/app-baseclasses.rst b/documentation/app-baseclasses.rst
new file mode 100644
index 0000000..6ac5cfc
--- /dev/null
+++ b/documentation/app-baseclasses.rst
@@ -0,0 +1,521 @@
+.. _app-baseclasses:
+
+Specialized App Base Classes
+============================
+
+Beyond the bare-minimum :class:`~clams.app.ClamsApp` introduced in
+:ref:`introduction`, the SDK provides specialized base classes that capture
+common structural patterns for CLAMS apps. Each specialized base class
+extends :class:`~clams.app.ClamsApp` with a standardized runtime parameter
+surface and helper methods appropriate to its category of app. App
+developers inherit from the specialized base class that best matches what
+their app does, instead of inheriting from :class:`~clams.app.ClamsApp`
+directly.
+
+This page first recaps what every CLAMS app inherits from
+:class:`~clams.app.ClamsApp` (the baseline), then documents each
+specialized base class and what it adds on top.
+
+.. _app-baseline:
+
+What every CLAMS app inherits
+-----------------------------
+
+Every CLAMS app subclasses :class:`~clams.app.ClamsApp` (directly or via
+a specialized base class such as :class:`~clams.app.ClamsPromptableApp`)
+and inherits its baseline behaviors: parameter casting and refinement,
+view signing, JSON envelope unwrapping, CUDA memory profiling and
+cleanup, error views, and a set of *universal* runtime parameters that
+the SDK auto-injects into every app's metadata.
+
+Universal parameters
+^^^^^^^^^^^^^^^^^^^^
+
+Added automatically by :meth:`~clams.app.ClamsApp.__init__` at runtime
+and by the standard ``metadata.py`` template's ``__main__`` block at
+``python metadata.py`` time. App developers do not declare them.
+
+.. list-table::
+ :header-rows: 1
+ :widths: 18 12 18 8 44
+
+ * - Name
+ - Type
+ - Default
+ - Multi-valued
+ - Notes
+ * - ``pretty``
+ - boolean
+ - ``false``
+ - no
+ - When ``true``, the response MMIF JSON is re-formatted with
+ 2-space indentation.
+ * - ``runningTime``
+ - boolean
+ - ``true``
+ - no
+ - When ``true``, the running time of the request is recorded in
+ the view metadata.
+ * - ``hwFetch``
+ - boolean
+ - ``false``
+ - no
+ - When ``true``, host hardware info (architecture, GPU and vRAM)
+ is recorded in the view metadata.
+ * - ``tfSamplingMode``
+ - string
+ - ``'representatives'``
+ - no
+ - For apps that process ``TimeFrame`` annotations: how to sample
+ frames within each TimeFrame. Choices: ``'representatives'``,
+ ``'single'``, ``'all'``. No effect on apps that do not process
+ TimeFrames.
+
+.. _sdk-managed-reserved:
+
+SDK-managed parameter names are reserved
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Parameter names added by the SDK (the universal parameters listed
+above, plus any parameters added by a specialized base class) are
+reserved. An app's ``appmetadata()`` MUST NOT declare any of these
+names via :meth:`AppMetadata.add_parameter` directly; doing so trips
+the existing duplicate-name ``ValueError`` when the SDK tries to add
+its own spec.
+
+This reservation guarantees a uniform, predictable parameter interface
+across all CLAMS apps. App developers can still customize a reserved
+parameter's *default value* (but not its ``type``, ``multivalued``, or
+``choices``) by mutating the ``default`` field on the already-injected
+parameter object; see :ref:`promptable-customizing-defaults` for a
+worked example.
+
+.. _promptable:
+
+Promptable CLAMS Apps
+---------------------
+
+A **promptable app** is a CLAMS app that wraps a promptable model: a large
+language model (LLM), vision-language model (VLM), audio-language model
+(ALM), large multimodal model (LMM), or remote generative API. The SDK
+provides :class:`~clams.app.ClamsPromptableApp` as a specialized base class
+for these apps. It standardizes the runtime parameter surface (prompts,
+generation hyperparameters, batch size) and provides helpers for building
+chat conversations and persisting model responses into MMIF.
+
+This section is the developer guide for writing or migrating a CLAMS app
+that inherits from :class:`~clams.app.ClamsPromptableApp`. For the general
+CLAMS app development pattern, see the :ref:`introduction`,
+:ref:`tutorial`, and :ref:`runtime-params` pages.
+
+When to use ``ClamsPromptableApp``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Choose :class:`~clams.app.ClamsPromptableApp` over :class:`~clams.app.ClamsApp`
+when your app's core operation is "given a prompt and some input
+(image/audio/text/structured data), return generated text." Concretely:
+
+- Image captioning, VLM-based OCR, scene description
+- Audio captioning, transcription via ALMs
+- Summarization, classification, structured-data extraction via LLMs
+- Tasks driven by an LMM that takes mixed-modality inputs
+- Any app that wraps a remote LLM, VLM, ALM, or LMM API and forwards a prompt
+
+If your app does not call a generative model (e.g. a classical OCR engine,
+a speech-to-text engine that doesn't take prompts, a classifier wrapping a
+discriminative model), keep using :class:`~clams.app.ClamsApp` directly.
+
+.. note::
+
+ ``ClamsPromptableApp`` assumes an **instruction- or chat-tuned**
+ model with a system/user/assistant role structure. Bare completion
+ / next-token-prediction base models do not fit this base class
+ cleanly; use :class:`~clams.app.ClamsApp` directly for those.
+
+Standardized runtime parameters
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Every :class:`~clams.app.ClamsPromptableApp` exposes the following
+SDK-managed runtime parameters in addition to the universal parameters
+from :class:`~clams.app.ClamsApp`. These names are reserved; see
+:ref:`sdk-managed-reserved`.
+
+.. list-table::
+ :header-rows: 1
+ :widths: 18 12 18 8 44
+
+ * - Name
+ - Type
+ - Default
+ - Multi-valued
+ - Notes
+ * - ``prompt``
+ - string
+ - *(required, no default)*
+ - yes
+ - User prompt(s) sent to the model. A single value runs as a one-shot
+ generation. A multi-value list is interpreted as a multi-turn static
+ prompt; see :ref:`promptable-multiturn`.
+ * - ``systemPrompt``
+ - string
+ - ``''``
+ - no
+ - Optional system-role text prepended to the conversation.
+ * - ``promptMode``
+ - string
+ - ``'turn-taking'``
+ - no
+ - How to interpret a multi-value ``prompt`` list. Choices:
+ ``'turn-taking'`` or ``'user-only'``. See :ref:`promptable-multiturn`.
+ * - ``maxNewTokens``
+ - integer
+ - ``512``
+ - no
+ - Maximum number of new tokens generated per inference call. Larger values
+ grow the KV cache linearly and add to GPU memory usage; reduce if VRAM
+ is constrained.
+ * - ``temperature``
+ - number
+ - ``0.0``
+ - no
+ - Sampling temperature. ``0.0`` selects deterministic / greedy decoding
+ for maximum reproducibility; override for sampled generation.
+ * - ``topP``
+ - number
+ - ``1.0``
+ - no
+ - Nucleus-sampling cumulative probability cutoff. Only meaningful when
+ ``temperature`` > 0.
+ * - ``topK``
+ - integer
+ - ``50``
+ - no
+ - Top-K sampling cutoff. Only meaningful when ``temperature`` > 0.
+ * - ``parallelPrompts``
+ - integer
+ - ``1``
+ - no
+ - Number of independent prompts the app stacks into a single
+ forward pass. Per-prompt content size is the app's
+ responsibility; prompt count and per-prompt size combine
+ multiplicatively for GPU memory. Keep at ``1`` on memory-tight
+ setups; see the parameter's own description in
+ :py:attr:`~clams.app.ClamsPromptableApp.promptable_parameters`
+ for an OOM-risk example.
+
+.. _promptable-customizing-defaults:
+
+Customizing default values
+""""""""""""""""""""""""""
+
+The SDK ships sensible defaults for most promptable parameters but
+deliberately leaves ``prompt`` **without** a default; prompts are
+inherently app-specific and no single value is right for all apps.
+Beyond ``prompt``, other defaults may also be inappropriate for a given
+app: a model that needs longer outputs wants a higher ``maxNewTokens``,
+a small-VRAM deployment wants ``parallelPrompts`` pinned at ``1``, etc.
+
+Because the reservation rule prevents calling
+``metadata.add_parameter('prompt', ...)`` (or any other promptable name)
+directly, the recommended pattern for customizing defaults is to mutate
+the ``default`` field on the already-injected parameter object right
+after calling :meth:`~clams.app.ClamsPromptableApp.inject_promptable_parameters`.
+You'll see a worked example of this in the ``metadata.py`` generated
+by the ``clams develop`` scaffold.
+
+This works for any promptable parameter. The parameter spec itself
+(``type``, ``multivalued``, ``choices``) stays locked by the SDK; only
+the ``default`` field is meant to be mutated this way, which preserves
+the cross-app uniformity that the reservation rule is designed to
+guarantee.
+
+If an app *wants* to require callers to pass a value explicitly (for
+``prompt`` or any other parameter), it can simply leave the default
+unchanged. ``prompt`` already has no default, so the SDK will raise a
+"required parameter" error if the caller omits it; for other params,
+deleting the SDK default and leaving it ``None`` would have the same
+effect, though that's rarely useful.
+
+.. _promptable-declaration:
+
+Declaring a promptable app
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A promptable app requires two paired edits relative to the scaffold
+generated by ``clams develop``:
+
+1. In ``app.py``, change the app class's base from
+ :class:`~clams.app.ClamsApp` to
+ :class:`~clams.app.ClamsPromptableApp` and implement
+ :meth:`~clams.app.ClamsPromptableApp.generate`. The scaffold file
+ already contains a guiding comment at the class declaration line.
+2. In ``metadata.py``, call
+ :meth:`ClamsPromptableApp.inject_promptable_parameters
+ ` at
+ the end of ``appmetadata()``. The scaffold file already contains
+ a commented-out helper-call block; uncomment it.
+
+The ``__main__`` block in ``metadata.py`` is unchanged from
+non-promptable apps. The helper call inside ``appmetadata()`` makes
+the promptable parameters visible to both ``python metadata.py``
+(build-time discovery) and to
+:meth:`~clams.app.ClamsApp._load_appmetadata` (runtime). The base
+class change ensures the app inherits the parameter-presence
+validation, the ``generate()`` contract, and the helper methods at
+runtime.
+
+The ``generate()`` contract
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Subclasses of :class:`~clams.app.ClamsPromptableApp` that wrap a backend
+without a default SDK implementation (e.g., remote-API or custom local
+backends) MUST implement :meth:`~clams.app.ClamsPromptableApp.generate`.
+Subclasses of :class:`~clams.app.ClamsHFPromptableApp` inherit a concrete
+``generate()`` and do not need to override it. See the method's docstring
+for the full signature, batch semantics, and return value.
+
+Keep inference logic inside ``generate()`` distinct from MMIF I/O; the
+latter belongs in ``_annotate()`` (which calls ``self.generate()``).
+This separation lets HF-backed apps inherit the default ``generate()``
+without restating backend mechanics, and lets non-HF apps swap in a new
+``generate()`` without rewriting their MMIF I/O.
+
+.. _promptable-multiturn:
+
+Multi-turn handling (``promptMode``)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``prompt`` is always a ``List[str]`` after parameter casting. When the
+list has a single element, ``promptMode`` is irrelevant (single-shot
+generation). When the list has multiple elements, ``promptMode`` selects
+between two multi-element prompting strategies:
+
+**Turn-taking** (default). The list is interpreted as an alternating
+user/assistant conversation: even indices (0, 2, 4, ...) are user turns,
+odd indices are assistant turns. The full conversation is sent to the
+model in a single ``generate`` call. This mode supports any pattern
+that fits an alternating role structure, including few-shot in-context
+learning (where the (user, assistant) pairs are task exemplars and the
+final user turn is the new query), multi-turn dialogue continuation,
+and role-play scaffolding. Example (few-shot ICL): ``["Classify
+sentiment: 'I love this.'", "positive", "Classify sentiment: 'I hate
+this.'", "negative", "Classify sentiment: 'It's okay.'"]``: two
+exemplar pairs followed by a final query; one inference returns the
+final reply.
+
+**User-only**. Every element is a user turn; the model generates an
+assistant reply between each, in N successive ``generate`` calls. Only
+the final assistant response is returned per input item. This mode
+implements iterative / scripted multi-step prompting, a manual,
+externally-driven scaffold for stepwise reasoning. (It is distinct
+from in-model zero-shot chain-of-thought, where stepwise reasoning is
+elicited inside a single inference call by a prompt like "let's think
+step by step"; here, the user-side scaffolding makes the steps
+explicit and feeds each intermediate model output back as context for
+the next user turn.) Example (scripted multi-step reasoning):
+``["Step 1: identify objects.", "Step 2: describe relationships.",
+"Step 3: conclude."]``: three sequential user prompts, three
+inferences, final reply returned.
+
+``turn-taking`` is the default because it costs a single inference call
+and is the more common multi-element pattern.
+
+Helpers
+^^^^^^^
+
+:meth:`~clams.app.ClamsPromptableApp.inject_promptable_parameters`
+ A static method called from your app's ``appmetadata()`` (in
+ ``metadata.py``) to add the SDK-managed promptable parameters.
+
+:meth:`~clams.app.ClamsPromptableApp.build_conversation`
+ Instance method that constructs a chat-template-compatible message
+ list (or a ``List[List[dict]]`` of progressively-extending prefixes
+ for ``user-only`` mode). Handles string and list prompt forms, the
+ two ``promptMode`` semantics, the optional ``systemPrompt``, and
+ inlines ``images`` / ``audios`` into the (final) user turn. Accepts
+ a pre-built ``List[dict]`` and returns it unchanged. Subclasses
+ may override to access model-specific state (e.g.
+ ``self.processor``) when formatting messages.
+
+:meth:`~clams.app.ClamsPromptableApp.response_to_grounded_textdocument`
+ Writes a ``TextDocument`` plus an ``Alignment`` (``source -> TD``)
+ into a view. ``source`` is the coarse cross-modal anchor; the
+ optional ``origins`` (paired with ``origination``) is the finer
+ derivation list, written to the TD's ``origins`` / ``origination``
+ properties. See https://clams.ai/clams-vocabulary/Document for
+ vocabulary semantics.
+
+.. _hf-promptable:
+
+HuggingFace Promptable Apps
+---------------------------
+
+For the very common case of "promptable CLAMS app + local HuggingFace
+``transformers`` model," the SDK provides
+:class:`~clams.app.ClamsHFPromptableApp`, a specialized subclass of
+:class:`~clams.app.ClamsPromptableApp` that absorbs all HF-specific
+inference boilerplate. Concrete apps inheriting from it declare the
+model via a few class attributes and typically only need to implement
+``_annotate()`` for their MMIF I/O.
+
+When to use
+^^^^^^^^^^^
+
+Choose :class:`~clams.app.ClamsHFPromptableApp` over plain
+:class:`~clams.app.ClamsPromptableApp` when your app:
+
+- wraps a local HuggingFace ``transformers`` model loadable via
+ ``from_pretrained()``, AND
+- runs the standard chat-template -> ``model.generate`` ->
+ ``batch_decode`` inference pipeline (every modern instruct-tuned
+ VLM/LLM in HF), AND
+- doesn't need bespoke pixel-value preprocessing or vision-token
+ stitching at inference time.
+
+If your app uses a remote API instead (OpenAI, Anthropic, etc.), or a
+non-HF local backend, inherit from
+:class:`~clams.app.ClamsPromptableApp` directly and implement
+:meth:`~clams.app.ClamsPromptableApp.generate` yourself.
+
+.. _hf-promptable-declaring:
+
+Declaring an HF promptable app
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+On top of the baseline declaration shared by every promptable app
+(see :ref:`promptable-declaration`), a
+:class:`~clams.app.ClamsHFPromptableApp` subclass:
+
+1. Uses :class:`~clams.app.ClamsHFPromptableApp` (not
+ :class:`~clams.app.ClamsPromptableApp`) as the base class in
+ ``app.py``.
+2. Declares the required class attribute ``MODEL_CLS`` and any
+ optional dtype / padding / kwargs hints (see
+ :ref:`hf-promptable-class-attrs` for the full list).
+3. Sets ``analyzer_versions={: , ...}`` on the
+ ``AppMetadata`` constructor call in ``metadata.py`` (replaces the
+ singular ``analyzer_version`` for HF apps).
+4. Calls
+ :meth:`ClamsHFPromptableApp.inject_promptable_parameters
+ `
+ (the HF override of the plain helper) at the end of
+ ``appmetadata()``. The scaffold ``metadata.py`` contains a
+ commented-out HF block; uncomment it.
+5. Inherits the base class's
+ :meth:`~clams.app.ClamsPromptableApp.generate` implementation;
+ no override needed.
+
+For a minimal worked example, see the class docstring on
+:class:`~clams.app.ClamsHFPromptableApp`.
+
+.. _hf-promptable-class-attrs:
+
+Class-attribute hooks
+^^^^^^^^^^^^^^^^^^^^^
+
+Concrete subclasses declare the model class plus optional dtype /
+padding hints via class attributes, and declare the family of
+supported model variants (with pinned commits) via
+``analyzer_versions`` in ``metadata.py``:
+
+.. list-table::
+ :header-rows: 1
+ :widths: 22 60 18
+
+ * - Attribute
+ - Meaning
+ - Required
+ * - ``MODEL_CLS``
+ - ``transformers`` model class (e.g.
+ :class:`~transformers.AutoModelForImageTextToText`,
+ :class:`~transformers.AutoModelForCausalLM`).
+ - yes
+ * - ``PROCESSOR_CLS``
+ - Processor / tokenizer / feature-extractor class. Defaults to
+ :class:`~transformers.AutoProcessor`.
+ - no
+ * - ``DTYPE``
+ - Torch dtype for the model and for ``pixel_values`` casting in
+ :py:meth:`~clams.app.ClamsHFPromptableApp.generate`. E.g.
+ ``torch.bfloat16`` for low-precision LLM inference.
+ - no
+ * - ``PADDING_SIDE``
+ - Tokenizer padding side. ``'left'`` for decoder-only batched
+ generation; leave unset otherwise.
+ - no
+ * - ``MODEL_KWARGS`` / ``PROCESSOR_KWARGS``
+ - Extra kwargs forwarded to the respective
+ ``from_pretrained()`` calls (e.g.
+ ``trust_remote_code=True``).
+ - no
+
+The HF model identifiers themselves are NOT a class attribute. They
+live in ``metadata.py`` as ``analyzer_versions``, a
+``Dict[str, str]`` mapping each supported model id to its pinned
+commit hash. The SDK auto-derives a ``model`` runtime parameter
+from this dict, with ``choices`` set to the dict keys.
+
+Family / singleton handling
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When ``analyzer_versions`` contains a single entry (the typical
+single-model app), the SDK eagerly pre-loads that one model in
+``__init__`` and sets ``model.default`` to the only key so callers
+can omit the parameter. Single-model apps thus preserve warm-start
+semantics: the model is loaded at app startup, not on first request.
+
+When ``analyzer_versions`` contains multiple entries (a family app),
+loading is deferred until the first :py:meth:`load_model` call inside
+``_annotate``, and ``model`` has no default by default; callers
+must pick a family member explicitly (or the dev mutates
+``model.default`` post-injection to provide a recommended pick).
+Loaded models are cached per ``(model_id, revision)`` for the
+lifetime of the app instance; switching models loads on first miss,
+cache-hits on repeat.
+
+Reproducibility: ``model`` refinement and view metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The user-facing ``model`` parameter accepts raw HF model ids
+(``org/repo-name``). The SDK's
+:py:meth:`~clams.app.ClamsHFPromptableApp._refine_params` expands the
+raw value to ``org/repo-name@`` form (using the dict
+lookup) during parameter refinement. The standard ``sign_view`` flow
+then stamps:
+
+- the **raw** user choice into ``view.metadata.parameters['model']``
+ (transparency: what the user typed),
+- the **resolved** ``org/repo-name@`` into
+ ``view.metadata.appConfiguration['model']`` (reproducibility: the
+ exact commit applied).
+
+A consumer of the output MMIF can read the resolved revision directly
+from the view metadata, with no cross-reference to the app metadata
+required.
+
+What the base class provides
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A subclass typically only writes ``_annotate()``. The base class
+supplies:
+
+* model loading and caching via
+ :py:meth:`~clams.app.ClamsHFPromptableApp.load_model`, which wraps
+ :func:`clams.backends.hf.load_hf_model` (non-promptable HF apps
+ can call that loader directly without going through this base
+ class);
+* the parameter injector
+ :py:meth:`ClamsHFPromptableApp.inject_promptable_parameters
+ `;
+* a concrete batched HF
+ :py:meth:`~clams.app.ClamsHFPromptableApp.generate`;
+* a default
+ :py:meth:`~clams.app.ClamsHFPromptableApp.build_gen_kwargs` that
+ maps the SDK promptable parameters to HF ``model.generate()``
+ kwargs.
+
+See each method's docstring for full details.
+
+Apps using the HF backend (with or without the promptable wrapper)
+must install the ``[hf]`` extra: ``pip install clams-python[hf]``.
+
diff --git a/documentation/index.rst b/documentation/index.rst
index 9f6db33..5882e11 100644
--- a/documentation/index.rst
+++ b/documentation/index.rst
@@ -16,6 +16,7 @@ CLAMS Python SDK
introduction
input-output
runtime-params
+ app-baseclasses
gpu-apps
appmetadata
appdirectory
diff --git a/documentation/introduction.rst b/documentation/introduction.rst
index ce907e8..96435c5 100644
--- a/documentation/introduction.rst
+++ b/documentation/introduction.rst
@@ -72,13 +72,16 @@ As a developer you can expose different behaviors of the ``annotate()`` method b
These runtime configurations are not part of the MMIF input, but for reproducible analysis, you should record these configurations in the output MMIF.
.. note::
- There are *universal* parameters defined at the SDK-level that all CLAMS apps commonly use. See :const:`clams.app.ClamsApp.universal_parameters`.
+ Some runtime parameters are managed by the SDK itself rather than declared per-app. The *universal* parameters in :const:`clams.app.ClamsApp.universal_parameters` are one such set; they are auto-added to every CLAMS app. Specialized base classes (see below) add their own SDK-managed parameter sets on top.
.. warning::
All the runtime configurations should be pre-announced in the app metadata.
Also see <:doc:`tutorial`> for a step-by-step tutorial on how to write the ``_annotate()`` method with a simple example NLP tool.
+.. note::
+ Inheriting from :class:`~clams.app.ClamsApp` directly works for any CLAMS app. For common app categories (e.g. apps wrapping LLM or other multimodal models), the SDK provides specialized base classes that extend :class:`~clams.app.ClamsApp` with additional SDK-managed parameter sets and helpers. See :ref:`app-baseclasses`.
+
appmetadata()
"""""""""""""
diff --git a/documentation/modules.rst b/documentation/modules.rst
index 7897b4a..d25e3cd 100644
--- a/documentation/modules.rst
+++ b/documentation/modules.rst
@@ -7,5 +7,6 @@ API documentation
autodoc/clams.app
autodoc/clams.appmetadata
+ autodoc/clams.backends
autodoc/clams.restify
autodoc/clams.mmif_utils
diff --git a/documentation/runtime-params.rst b/documentation/runtime-params.rst
index 4d3bf93..146102e 100644
--- a/documentation/runtime-params.rst
+++ b/documentation/runtime-params.rst
@@ -190,6 +190,20 @@ For more complex value structures (e.g., comma-separated lists within values),
the app developer is responsible for further parsing and should document the
expected format in the parameter's ``description`` field.
+.. _runtime-params-promptable-note:
+
+Promptable apps: an extra SDK-managed parameter set
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For apps that wrap an **instruction- or chat-tuned** promptable model
+(an LLM or other multimodal model, local or remote), inherit from
+:class:`~clams.app.ClamsPromptableApp` instead of
+:class:`~clams.app.ClamsApp`. The promptable base class adds a
+standardized SDK-managed parameter set on top of the universal
+parameters; the names are reserved and are added via a single helper
+call inside ``appmetadata()``. See :ref:`promptable` for the full
+developer guide and parameter list.
+
.. _runtime-params-envelope-note:
Note on JSON envelope input
diff --git a/documentation/tutorial.md b/documentation/tutorial.md
index b7f7469..e5d2698 100644
--- a/documentation/tutorial.md
+++ b/documentation/tutorial.md
@@ -146,7 +146,7 @@ This means that if the user doesn't specify the value for these parameters at th
If you want to make a parameter "optional" by providing a default value, you can do so by adding a `default` argument to the `add_parameter()` method.
> **Note**
-> Also refer to [CLAMS App Metadata](https://sdk.clams.ai/appmetadata.html) for more details regarding what fields need to be specified.
+> Also refer to [CLAMS App Metadata](https://clams.ai/clams-python/appmetadata.html) for more details regarding what fields need to be specified.
#### `_annotate()`
The `_annotate()` method should accept a MMIF file/string/object as its first parameter and always returns a `MMIF` object with an additional `view` containing annotation results. This is where the bulk of your logic will go. For a text processing app, it is mostly concerned with finding text documents, calling the code that runs over the text, creating new views and inserting the results.
@@ -228,20 +228,20 @@ First, with `text_value` we get the text from the text document, either from its
## Working with TimeFrame Annotations
-Many CLAMS apps process video by operating on TimeFrame annotations produced by an upstream app (e.g., scene detection, shot segmentation). A TimeFrame can carry structural members (currently called `targets` — a list of TimePoint IDs covering every frame in the segment), a salient subset of those members (currently called `representatives`), or simply `start`/`end` boundaries.
+Many CLAMS apps process video by operating on TimeFrame annotations produced by an upstream app (e.g., scene detection, shot segmentation). A TimeFrame can carry structural members (currently called `targets`; a list of TimePoint IDs covering every frame in the segment), a salient subset of those members (currently called `representatives`), or simply `start`/`end` boundaries.
> **Note**
> The property names `targets` and `representatives` are under review and may be renamed in a future MMIF spec version. See [mmif#238](https://github.com/clamsproject/mmif/issues/238) for the ongoing discussion. The SDK API will be updated accordingly.
### Frame sampling with `tfSamplingMode`
-When your app receives TimeFrame annotations, the caller can control which frames your app processes by setting the `tfSamplingMode` runtime parameter. This is a **universal parameter** — automatically available on every CLAMS app without any per-app configuration.
+When your app receives TimeFrame annotations, the caller can control which frames your app processes by setting the `tfSamplingMode` runtime parameter. This is a **universal parameter**: automatically available on every CLAMS app without any per-app configuration.
There are three modes:
-- `representatives` (default) — use the frames listed in the TimeFrame's `representatives` property. If no representatives exist, the TimeFrame is skipped.
-- `single` — pick one frame: the middle representative if available, otherwise the midpoint of the start/end interval.
-- `all` — use every frame in `targets` if present, otherwise generate every frame in the start/end interval.
+- `representatives` (default): use the frames listed in the TimeFrame's `representatives` property. If no representatives exist, the TimeFrame is skipped.
+- `single`: pick one frame: the middle representative if available, otherwise the midpoint of the start/end interval.
+- `all`: use every frame in `targets` if present, otherwise generate every frame in the start/end interval.
App developers do **not** need to handle this parameter themselves. The SDK intercepts it in `annotate()` and sets a context variable before `_annotate()` runs. Inside `_annotate()`, calls to `vdh.extract_frames_by_mode()` automatically read the active mode and select frames accordingly. The underlying per-mode functions (`_sample_representatives()`, `_sample_single()`, `_sample_all()`) in `mmif.utils.video_document_helper` are also available for apps that need frame numbers without extracting images.
diff --git a/pyproject.toml b/pyproject.toml
index 1b3fc4e..048410d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,7 @@ classifiers = [
"Programming Language :: Python :: 3 :: Only",
]
dependencies = [
- "mmif-python==1.4.0",
+ "mmif-python==1.5.0",
"Flask>=2",
"Flask-RESTful>=0.3.9",
"gunicorn>=20",
@@ -39,6 +39,9 @@ source = "https://github.com/clamsproject/clams-python"
dev = ["pytype", "pytest", "pytest-cov", "pillow", "setuptools"]
docs = ["sphinx>=7.0,<8.0", "furo", "m2r2", "sphinx-jsonschema"]
test = ["pytype", "pytest", "pytest-cov", "pillow"]
+# Required for apps using the HuggingFace transformers backend
+# (clams.backends.hf). Heavy deps; opt-in only.
+hf = ["torch", "transformers", "pillow", "tqdm"]
[tool.setuptools.packages.find]
where = ["."]
diff --git a/tests/test_backends_hf.py b/tests/test_backends_hf.py
new file mode 100644
index 0000000..fae696e
--- /dev/null
+++ b/tests/test_backends_hf.py
@@ -0,0 +1,450 @@
+"""
+Tests for :mod:`clams.backends.hf`.
+
+Exercises the device / dtype / padding-side / kwargs-passthrough
+behavior of both :func:`load_hf_model` and :func:`load_hf_pipeline`
+against mocked ``transformers`` model, processor, and pipeline
+constructors.
+
+If ``torch`` is not installed, the whole file is skipped (it is an
+optional dep behind the ``[hf]`` extra).
+"""
+import unittest
+from unittest import mock
+
+import pytest
+
+pytest.importorskip('torch')
+pytest.importorskip('transformers')
+
+# Force ``transformers.pipeline`` to be eagerly resolved into the
+# package's ``__dict__``. ``transformers`` uses a lazy-loading
+# ``_LazyModule`` that fetches submodule attributes via
+# ``__getattr__`` on first access; before that, the attribute does
+# not live in ``__dict__``. The first ``mock.patch('transformers.pipeline', ...)``
+# call would then silently fail to redirect ``from transformers import pipeline``
+# inside the helper. Touching the attribute here resolves it and
+# caches it in the package dict, so subsequent ``mock.patch`` calls
+# rewrite the real entry as expected.
+import transformers # noqa: E402
+_ = transformers.pipeline
+
+from clams.backends.hf import load_hf_model, load_hf_pipeline # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# Mocks
+# ---------------------------------------------------------------------------
+
+class _MockModel:
+ """Stand-in for a ``transformers`` model class."""
+
+ # cross-test state — each test should set this to None first
+ last_from_pretrained_args = None
+ last_from_pretrained_kwargs = None
+
+ @classmethod
+ def from_pretrained(cls, model_id, **kwargs):
+ cls.last_from_pretrained_args = (model_id,)
+ cls.last_from_pretrained_kwargs = dict(kwargs)
+ return cls()
+
+ def __init__(self):
+ self.device = None
+ self.eval_called = False
+
+ def to(self, device):
+ self.device = device
+ return self
+
+ def eval(self):
+ self.eval_called = True
+ return self
+
+
+class _MockTokenizer:
+ def __init__(self):
+ self.padding_side = 'right'
+ self.pad_token = None
+ self.eos_token = ''
+
+
+class _MockProcessor:
+ """Stand-in for ``AutoProcessor`` (or similar)."""
+
+ last_from_pretrained_args = None
+ last_from_pretrained_kwargs = None
+
+ @classmethod
+ def from_pretrained(cls, model_id, **kwargs):
+ cls.last_from_pretrained_args = (model_id,)
+ cls.last_from_pretrained_kwargs = dict(kwargs)
+ return cls()
+
+ def __init__(self):
+ self.tokenizer = _MockTokenizer()
+
+
+# ---------------------------------------------------------------------------
+# Test cases
+# ---------------------------------------------------------------------------
+
+class TestDefaultsOnly(unittest.TestCase):
+ """
+ Case (a): caller passes only ``model_id`` + ``model_cls``.
+ No dtype, no padding_side, no extra kwargs.
+ """
+
+ def setUp(self):
+ _MockModel.last_from_pretrained_args = None
+ _MockModel.last_from_pretrained_kwargs = None
+ _MockProcessor.last_from_pretrained_args = None
+ _MockProcessor.last_from_pretrained_kwargs = None
+
+ def test_returns_processor_model_device_tuple(self):
+ result = load_hf_model(
+ 'fake-model-id', _MockModel, processor_cls=_MockProcessor)
+ self.assertEqual(len(result), 3)
+ processor, model, device = result
+ self.assertIsInstance(processor, _MockProcessor)
+ self.assertIsInstance(model, _MockModel)
+ self.assertIsInstance(device, str)
+ # cpu or cuda depending on host — must be one of them
+ self.assertIn(device, ('cpu', 'cuda'))
+
+ def test_no_torch_dtype_passed_when_dtype_is_none(self):
+ load_hf_model(
+ 'fake-model-id', _MockModel, processor_cls=_MockProcessor)
+ # When dtype is None, helper should NOT inject torch_dtype into
+ # model_cls.from_pretrained (let the model class use its own
+ # default).
+ kwargs = _MockModel.last_from_pretrained_kwargs
+ self.assertNotIn('torch_dtype', kwargs)
+
+ def test_padding_side_untouched_when_not_requested(self):
+ processor, _, _ = load_hf_model(
+ 'fake-model-id', _MockModel, processor_cls=_MockProcessor)
+ # Default 'right' should persist; helper should NOT have
+ # rewritten it.
+ self.assertEqual(processor.tokenizer.padding_side, 'right')
+ # pad_token should NOT have been forced to EOS.
+ self.assertIsNone(processor.tokenizer.pad_token)
+
+ def test_model_put_in_eval_mode(self):
+ _, model, _ = load_hf_model(
+ 'fake-model-id', _MockModel, processor_cls=_MockProcessor)
+ self.assertTrue(model.eval_called)
+
+
+class TestDecoderOnlyMode(unittest.TestCase):
+ """
+ Case (b): caller passes ``padding_side='left'`` (decoder-only
+ batched generation) and an explicit ``dtype``.
+ """
+
+ def setUp(self):
+ _MockModel.last_from_pretrained_args = None
+ _MockModel.last_from_pretrained_kwargs = None
+ _MockProcessor.last_from_pretrained_args = None
+ _MockProcessor.last_from_pretrained_kwargs = None
+
+ def test_padding_side_set_to_left_on_tokenizer(self):
+ processor, _, _ = load_hf_model(
+ 'fake-model-id', _MockModel,
+ processor_cls=_MockProcessor,
+ padding_side='left',
+ )
+ self.assertEqual(processor.tokenizer.padding_side, 'left')
+
+ def test_pad_token_set_from_eos_when_unset(self):
+ processor, _, _ = load_hf_model(
+ 'fake-model-id', _MockModel,
+ processor_cls=_MockProcessor,
+ padding_side='left',
+ )
+ self.assertEqual(
+ processor.tokenizer.pad_token,
+ processor.tokenizer.eos_token,
+ )
+
+ def test_dtype_forwarded_as_torch_dtype(self):
+ import torch
+ load_hf_model(
+ 'fake-model-id', _MockModel,
+ processor_cls=_MockProcessor,
+ dtype=torch.bfloat16,
+ padding_side='left',
+ )
+ self.assertEqual(
+ _MockModel.last_from_pretrained_kwargs.get('torch_dtype'),
+ torch.bfloat16,
+ )
+
+
+class TestKwargsPassThrough(unittest.TestCase):
+ """
+ Case (c): ``model_kwargs`` and ``processor_kwargs`` reach the
+ respective ``from_pretrained`` calls. Validates the SWT-style
+ pattern (use_safetensors, use_fast, add_pooling_layer, etc.).
+ """
+
+ def setUp(self):
+ _MockModel.last_from_pretrained_args = None
+ _MockModel.last_from_pretrained_kwargs = None
+ _MockProcessor.last_from_pretrained_args = None
+ _MockProcessor.last_from_pretrained_kwargs = None
+
+ def test_model_kwargs_reach_from_pretrained(self):
+ load_hf_model(
+ 'fake-model-id', _MockModel,
+ processor_cls=_MockProcessor,
+ model_kwargs={'use_safetensors': True,
+ 'add_pooling_layer': False},
+ )
+ kw = _MockModel.last_from_pretrained_kwargs
+ self.assertTrue(kw.get('use_safetensors'))
+ self.assertFalse(kw.get('add_pooling_layer'))
+
+ def test_processor_kwargs_reach_from_pretrained(self):
+ load_hf_model(
+ 'fake-model-id', _MockModel,
+ processor_cls=_MockProcessor,
+ processor_kwargs={'use_safetensors': True, 'use_fast': True},
+ )
+ kw = _MockProcessor.last_from_pretrained_kwargs
+ self.assertTrue(kw.get('use_safetensors'))
+ self.assertTrue(kw.get('use_fast'))
+
+ def test_model_id_arrives_first_positional(self):
+ load_hf_model(
+ 'fake-model-id', _MockModel, processor_cls=_MockProcessor)
+ self.assertEqual(
+ _MockModel.last_from_pretrained_args, ('fake-model-id',))
+ self.assertEqual(
+ _MockProcessor.last_from_pretrained_args, ('fake-model-id',))
+
+ def test_model_and_processor_kwargs_do_not_cross_contaminate(self):
+ """SWT mixes incompatible kwargs across model and processor;
+ ensure helper doesn't blindly merge them."""
+ load_hf_model(
+ 'fake-model-id', _MockModel,
+ processor_cls=_MockProcessor,
+ model_kwargs={'add_pooling_layer': False},
+ processor_kwargs={'use_fast': True},
+ )
+ # add_pooling_layer is model-only; should NOT reach processor
+ self.assertNotIn(
+ 'add_pooling_layer',
+ _MockProcessor.last_from_pretrained_kwargs)
+ # use_fast is processor-only; should NOT reach model
+ self.assertNotIn(
+ 'use_fast',
+ _MockModel.last_from_pretrained_kwargs)
+
+
+class TestDeviceResolution(unittest.TestCase):
+ """The helper auto-detects cuda/cpu when device is None."""
+
+ def setUp(self):
+ _MockModel.last_from_pretrained_args = None
+ _MockModel.last_from_pretrained_kwargs = None
+
+ def test_explicit_device_honored(self):
+ _, model, device = load_hf_model(
+ 'fake-model-id', _MockModel,
+ processor_cls=_MockProcessor,
+ device='cpu',
+ )
+ self.assertEqual(device, 'cpu')
+ self.assertEqual(model.device, 'cpu')
+
+
+class TestMoveToDeviceFlag(unittest.TestCase):
+ """
+ ``move_to_device=False`` skips both the ``.to(device)`` move and
+ the ``.eval()`` switch, for library-style HF wrappers that defer
+ device placement and inference-mode switching to a downstream
+ consumer.
+ """
+
+ def setUp(self):
+ _MockModel.last_from_pretrained_args = None
+ _MockModel.last_from_pretrained_kwargs = None
+
+ def test_move_skipped_when_flag_false(self):
+ _, model, _ = load_hf_model(
+ 'fake-model-id', _MockModel,
+ processor_cls=_MockProcessor,
+ move_to_device=False,
+ )
+ # _MockModel.__init__ leaves device=None; .to() would set it.
+ self.assertIsNone(model.device)
+
+ def test_eval_skipped_when_flag_false(self):
+ _, model, _ = load_hf_model(
+ 'fake-model-id', _MockModel,
+ processor_cls=_MockProcessor,
+ move_to_device=False,
+ )
+ self.assertFalse(model.eval_called)
+
+ def test_resolved_device_still_returned(self):
+ """Even when not moved, the resolved target is reported so the
+ downstream consumer can use it for its own ``.to(device)``."""
+ _, _, device = load_hf_model(
+ 'fake-model-id', _MockModel,
+ processor_cls=_MockProcessor,
+ device='cpu',
+ move_to_device=False,
+ )
+ self.assertEqual(device, 'cpu')
+
+ def test_default_still_moves_and_evals(self):
+ """Regression guard: the default (omitted) value of the new
+ flag preserves prior behavior."""
+ _, model, _ = load_hf_model(
+ 'fake-model-id', _MockModel,
+ processor_cls=_MockProcessor,
+ device='cpu',
+ )
+ self.assertEqual(model.device, 'cpu')
+ self.assertTrue(model.eval_called)
+
+
+# ---------------------------------------------------------------------------
+# load_hf_pipeline tests
+# ---------------------------------------------------------------------------
+
+class _FakePipeline:
+ """Captures the args/kwargs the helper forwards to
+ ``transformers.pipeline``. Behaves as the returned pipeline object
+ too -- just a tagged callable stand-in."""
+
+ last_args = None
+ last_kwargs = None
+
+ def __init__(self, *args, **kwargs):
+ type(self).last_args = args
+ type(self).last_kwargs = dict(kwargs)
+
+
+def _patch_pipeline():
+ """Patch ``transformers.pipeline`` to record its call and return a
+ ``_FakePipeline`` instance."""
+ _FakePipeline.last_args = None
+ _FakePipeline.last_kwargs = None
+ return mock.patch('transformers.pipeline', _FakePipeline)
+
+
+class TestLoadHFPipelineDefaults(unittest.TestCase):
+ """The default path: just task + model_id."""
+
+ def test_returns_pipeline_and_device(self):
+ with _patch_pipeline():
+ pipe, device = load_hf_pipeline(
+ 'automatic-speech-recognition', 'openai/whisper-tiny')
+ self.assertIsInstance(pipe, _FakePipeline)
+ self.assertIn(device, ('cpu', 'cuda'))
+
+ def test_task_arrives_first_positional(self):
+ with _patch_pipeline():
+ load_hf_pipeline(
+ 'token-classification', 'fake/ner-model')
+ self.assertEqual(_FakePipeline.last_args, ('token-classification',))
+
+ def test_model_id_forwarded_as_model_kwarg(self):
+ with _patch_pipeline():
+ load_hf_pipeline(
+ 'automatic-speech-recognition', 'openai/whisper-tiny')
+ self.assertEqual(
+ _FakePipeline.last_kwargs.get('model'), 'openai/whisper-tiny')
+
+ def test_no_revision_kwarg_when_not_specified(self):
+ with _patch_pipeline():
+ load_hf_pipeline(
+ 'automatic-speech-recognition', 'openai/whisper-tiny')
+ self.assertNotIn('revision', _FakePipeline.last_kwargs)
+
+
+class TestLoadHFPipelineDevice(unittest.TestCase):
+ """Device handling: auto-detect, explicit string, explicit int."""
+
+ def test_auto_detect_when_none(self):
+ with _patch_pipeline():
+ _, device = load_hf_pipeline(
+ 'automatic-speech-recognition', 'openai/whisper-tiny')
+ self.assertIn(device, ('cpu', 'cuda'))
+ # Same value should have been passed to pipeline().
+ self.assertEqual(_FakePipeline.last_kwargs.get('device'), device)
+
+ def test_explicit_string_device_honored(self):
+ with _patch_pipeline():
+ _, device = load_hf_pipeline(
+ 'automatic-speech-recognition', 'openai/whisper-tiny',
+ device='cpu')
+ self.assertEqual(device, 'cpu')
+ self.assertEqual(_FakePipeline.last_kwargs.get('device'), 'cpu')
+
+ def test_explicit_int_device_honored(self):
+ """``pipeline()`` natively accepts ``-1`` for CPU, ``0+`` for
+ a specific GPU index. The helper passes it through unchanged."""
+ with _patch_pipeline():
+ _, device = load_hf_pipeline(
+ 'automatic-speech-recognition', 'openai/whisper-tiny',
+ device=-1)
+ self.assertEqual(device, -1)
+ self.assertEqual(_FakePipeline.last_kwargs.get('device'), -1)
+
+
+class TestLoadHFPipelineKwargsPassThrough(unittest.TestCase):
+ """``model_kwargs`` lands inside ``pipeline(model_kwargs={...})``;
+ ``pipeline_kwargs`` is spread directly into the pipeline call."""
+
+ def test_pipeline_kwargs_spread_into_call(self):
+ with _patch_pipeline():
+ load_hf_pipeline(
+ 'automatic-speech-recognition', 'openai/whisper-tiny',
+ pipeline_kwargs={
+ 'generate_kwargs': {'num_beams': 5},
+ 'batch_size': 8,
+ })
+ kw = _FakePipeline.last_kwargs
+ self.assertEqual(kw.get('generate_kwargs'), {'num_beams': 5})
+ self.assertEqual(kw.get('batch_size'), 8)
+
+ def test_model_kwargs_nested_under_model_kwargs(self):
+ with _patch_pipeline():
+ load_hf_pipeline(
+ 'automatic-speech-recognition', 'openai/whisper-tiny',
+ model_kwargs={'use_safetensors': True})
+ kw = _FakePipeline.last_kwargs
+ self.assertEqual(kw.get('model_kwargs'),
+ {'use_safetensors': True})
+
+ def test_revision_forwarded(self):
+ with _patch_pipeline():
+ load_hf_pipeline(
+ 'automatic-speech-recognition', 'openai/whisper-tiny',
+ revision='abc1234')
+ self.assertEqual(_FakePipeline.last_kwargs.get('revision'), 'abc1234')
+
+ def test_explicit_helper_args_take_precedence(self):
+ """If the caller smuggles ``model`` / ``device`` / ``revision``
+ through ``pipeline_kwargs``, the helper's own args win."""
+ with _patch_pipeline():
+ load_hf_pipeline(
+ 'automatic-speech-recognition', 'openai/whisper-tiny',
+ device='cpu', revision='abc1234',
+ pipeline_kwargs={
+ 'model': 'should-be-overridden',
+ 'device': 'should-be-overridden',
+ 'revision': 'should-be-overridden',
+ })
+ kw = _FakePipeline.last_kwargs
+ self.assertEqual(kw['model'], 'openai/whisper-tiny')
+ self.assertEqual(kw['device'], 'cpu')
+ self.assertEqual(kw['revision'], 'abc1234')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/test_promptable.py b/tests/test_promptable.py
new file mode 100644
index 0000000..44a8fe7
--- /dev/null
+++ b/tests/test_promptable.py
@@ -0,0 +1,585 @@
+"""
+Tests for :class:`clams.app.ClamsPromptableApp`.
+
+Covers the behavior documented in
+``documentation/app-baseclasses.rst``: parameter discovery via
+``inject_promptable_parameters()``, the reservation rule on
+promptable-param names, ``build_conversation()`` shape across the
+single-turn / turn-taking / user-only modes, and the
+``response_to_grounded_textdocument()`` output contract.
+"""
+import unittest
+
+from mmif import AnnotationTypes, DocumentTypes, Mmif
+
+from clams import AppMetadata, ClamsPromptableApp
+
+
+# ---------------------------------------------------------------------------
+# Test infrastructure
+# ---------------------------------------------------------------------------
+
+def make_metadata(call_helper=True, pre_declare=None,
+ analyzer_versions=None, hf_helper=False):
+ """
+ Build a fresh AppMetadata for tests.
+
+ :param call_helper: if True, calls
+ ``ClamsPromptableApp.inject_promptable_parameters(metadata)``
+ at the end (simulating a correctly-written ``appmetadata()``).
+ Mutually exclusive with ``hf_helper``.
+ :param pre_declare: if set to a parameter spec dict, calls
+ ``metadata.add_parameter(**pre_declare)`` BEFORE the helper
+ runs — used to test reservation enforcement.
+ :param analyzer_versions: if set, passed through to
+ ``AppMetadata(analyzer_versions=...)``. Required when the
+ fixture is consumed by ``ClamsHFPromptableApp`` tests.
+ :param hf_helper: if True, calls
+ ``ClamsHFPromptableApp.inject_promptable_parameters(metadata)``
+ (the HF override of the plain promptable helper). Use for HF
+ fixture builds.
+ """
+ kwargs = dict(
+ name="Example Promptable App",
+ description="Test fixture, creating input TD - output TD alignment",
+ app_license="MIT",
+ identifier="https://apps.clams.ai/example-promptable/v1",
+ url="https://fakegithub.com/some/repository",
+ )
+ if analyzer_versions is not None:
+ kwargs['analyzer_versions'] = analyzer_versions
+ m = AppMetadata(**kwargs)
+ m.add_input(DocumentTypes.TextDocument)
+ m.add_output(DocumentTypes.TextDocument)
+ m.add_output(AnnotationTypes.Alignment)
+ if pre_declare is not None:
+ m.add_parameter(**pre_declare)
+ if hf_helper:
+ from clams.app import ClamsHFPromptableApp
+ ClamsHFPromptableApp.inject_promptable_parameters(m)
+ elif call_helper:
+ ClamsPromptableApp.inject_promptable_parameters(m)
+ return m
+
+
+def make_test_app(metadata):
+ """
+ Factory creating a fresh ClamsPromptableApp subclass that loads the
+ given metadata. Each call produces a fresh class so per-test state
+ doesn't leak.
+ """
+
+ def _load_appmetadata(self):
+ return metadata
+
+ cls = type(
+ 'TestPromptableApp',
+ (ClamsPromptableApp,),
+ {
+ '_load_appmetadata': _load_appmetadata,
+ '_appmetadata': lambda self: None,
+ '_annotate': lambda self, mmif, **kw: mmif,
+ 'generate': lambda self, prompt, **kw: [""],
+ },
+ )
+ return cls()
+
+
+# ---------------------------------------------------------------------------
+# Parameter discovery (via the helper)
+# ---------------------------------------------------------------------------
+
+class TestParameterDiscovery(unittest.TestCase):
+
+ def test_all_promptable_params_present_after_init(self):
+ app = make_test_app(make_metadata(call_helper=True))
+ present = {p.name for p in app.metadata.parameters}
+ expected_promptable = {p['name']
+ for p in ClamsPromptableApp.promptable_parameters}
+ self.assertTrue(expected_promptable.issubset(present))
+
+ def test_prompt_has_no_sdk_default(self):
+ app = make_test_app(make_metadata(call_helper=True))
+ prompt_param = next(p for p in app.metadata.parameters
+ if p.name == 'prompt')
+ self.assertIsNone(prompt_param.default)
+ self.assertTrue(prompt_param.multivalued)
+
+ def test_system_prompt_default_empty_string(self):
+ app = make_test_app(make_metadata(call_helper=True))
+ sysprompt = next(p for p in app.metadata.parameters
+ if p.name == 'systemPrompt')
+ self.assertEqual(sysprompt.default, '')
+
+ def test_temperature_default_is_zero(self):
+ """When the caller omits ``temperature``, it should arrive in
+ ``_annotate()`` as the float ``0.0`` (deterministic decoding)."""
+ app = make_test_app(make_metadata(call_helper=True))
+ refined = app._refine_params(prompt=['hi'])
+ self.assertEqual(refined['temperature'], 0.0)
+ self.assertIsInstance(refined['temperature'], float)
+
+ def test_prompt_mode_choices(self):
+ app = make_test_app(make_metadata(call_helper=True))
+ pm = next(p for p in app.metadata.parameters
+ if p.name == 'promptMode')
+ self.assertEqual(set(pm.choices), {'user-only', 'turn-taking'})
+ self.assertEqual(pm.default, 'turn-taking')
+
+
+# ---------------------------------------------------------------------------
+# Required-prompt validation
+# ---------------------------------------------------------------------------
+
+class TestRequiredPrompt(unittest.TestCase):
+
+ def test_refine_params_raises_when_prompt_missing(self):
+ """
+ ``prompt`` has no SDK default. ``_refine_params`` must raise
+ ``ValueError`` when the caller omits it.
+ """
+ app = make_test_app(make_metadata(call_helper=True))
+ with self.assertRaises(ValueError) as ctx:
+ app._refine_params()
+ self.assertIn('prompt', str(ctx.exception))
+
+
+# ---------------------------------------------------------------------------
+# Missing-helper validation in __init__
+# ---------------------------------------------------------------------------
+
+class TestMissingHelperValidation(unittest.TestCase):
+
+ def test_init_raises_when_helper_not_called(self):
+ """
+ If ``appmetadata()`` forgets to call
+ ``inject_promptable_parameters()``, ``__init__`` must raise
+ ``ValueError`` with an instructive message.
+ """
+ with self.assertRaises(ValueError) as ctx:
+ make_test_app(make_metadata(call_helper=False))
+ msg = str(ctx.exception)
+ self.assertIn('inject_promptable_parameters', msg)
+
+
+# ---------------------------------------------------------------------------
+# Reservation enforcement (via duplicate-name ValueError)
+# ---------------------------------------------------------------------------
+
+class TestReservationEnforcement(unittest.TestCase):
+
+ def test_redeclaring_prompt_trips_duplicate_name_error(self):
+ """
+ An app that calls ``metadata.add_parameter('prompt', ...)``
+ before the helper trips the existing duplicate-name
+ ``ValueError`` from ``AppMetadata.add_parameter`` (which the
+ helper's own ``add_parameter`` call raises).
+ """
+ with self.assertRaises(ValueError) as ctx:
+ make_metadata(
+ call_helper=True,
+ pre_declare={
+ 'name': 'prompt',
+ 'description': 'app-defined collision',
+ 'type': 'string',
+ 'multivalued': True,
+ },
+ )
+ self.assertIn("'prompt'", str(ctx.exception))
+
+ def test_redeclaring_max_new_tokens_trips_error(self):
+ with self.assertRaises(ValueError) as ctx:
+ make_metadata(
+ call_helper=True,
+ pre_declare={
+ 'name': 'maxNewTokens',
+ 'description': 'app-defined collision',
+ 'type': 'integer',
+ 'default': 1024,
+ },
+ )
+ self.assertIn("'maxNewTokens'", str(ctx.exception))
+
+
+# ---------------------------------------------------------------------------
+# annotate_param_caster covers promptable params (no stale-spec drift)
+# ---------------------------------------------------------------------------
+
+class TestAnnotateParamCaster(unittest.TestCase):
+
+ def test_caster_includes_promptable_param_specs(self):
+ app = make_test_app(make_metadata(call_helper=True))
+ for spec in ClamsPromptableApp.promptable_parameters:
+ self.assertIn(spec['name'], app.annotate_param_spec)
+ stored_type, stored_multivalued = \
+ app.annotate_param_spec[spec['name']]
+ self.assertEqual(stored_type, spec['type'])
+ self.assertEqual(
+ stored_multivalued, spec.get('multivalued', False))
+
+ def test_multivalued_prompt_casts_to_list_of_strings(self):
+ app = make_test_app(make_metadata(call_helper=True))
+ refined = app._refine_params(prompt=['hello', 'world'])
+ self.assertEqual(refined['prompt'], ['hello', 'world'])
+
+ def test_max_new_tokens_casts_to_int(self):
+ app = make_test_app(make_metadata(call_helper=True))
+ refined = app._refine_params(prompt=['hi'], maxNewTokens=['1024'])
+ self.assertEqual(refined['maxNewTokens'], 1024)
+ self.assertIsInstance(refined['maxNewTokens'], int)
+
+ def test_temperature_casts_to_float(self):
+ app = make_test_app(make_metadata(call_helper=True))
+ refined = app._refine_params(prompt=['hi'], temperature=['0.7'])
+ self.assertEqual(refined['temperature'], 0.7)
+ self.assertIsInstance(refined['temperature'], float)
+
+
+# ---------------------------------------------------------------------------
+# build_conversation
+# ---------------------------------------------------------------------------
+
+class TestBuildConversation(unittest.TestCase):
+ """
+ Covers the shape of ``ClamsPromptableApp.build_conversation()``
+ across single-turn, turn-taking, and user-only modes, and the
+ pre-built-message pass-through case.
+ """
+
+ def setUp(self):
+ self.app = make_test_app(make_metadata(call_helper=True))
+
+ def test_string_prompt_single_user_turn(self):
+ conv = self.app.build_conversation(prompt="hello")
+ self.assertEqual(len(conv), 1)
+ self.assertEqual(conv[0]['role'], 'user')
+
+ def test_single_element_list_single_user_turn(self):
+ conv = self.app.build_conversation(prompt=['hello'])
+ self.assertEqual(len(conv), 1)
+ self.assertEqual(conv[0]['role'], 'user')
+
+ def test_turn_taking_alternating_turns(self):
+ conv = self.app.build_conversation(
+ prompt=['q1', 'a1', 'q2'], prompt_mode='turn-taking')
+ self.assertEqual(len(conv), 3)
+ self.assertEqual(conv[0]['role'], 'user')
+ self.assertEqual(conv[1]['role'], 'assistant')
+ self.assertEqual(conv[2]['role'], 'user')
+
+ def test_user_only_returns_progressively_extending_lists(self):
+ convs = self.app.build_conversation(
+ prompt=['q1', 'q2', 'q3'], prompt_mode='user-only')
+ # N progressively-extending message lists, one per turn
+ self.assertEqual(len(convs), 3)
+ # last conversation has all 3 user turns (+ intermediate
+ # assistant turns once the model has filled them in; at
+ # build_conversation time the assistants are placeholders or
+ # empty — the test pins length, not exact content)
+ self.assertGreaterEqual(len(convs[-1]), 3)
+
+ def test_pre_built_list_pass_through(self):
+ msgs = [
+ {'role': 'system', 'content': 'You are helpful.'},
+ {'role': 'user', 'content': 'hi'},
+ ]
+ conv = self.app.build_conversation(prompt=msgs)
+ self.assertEqual(conv, msgs)
+
+ def test_system_prompt_prepended(self):
+ conv = self.app.build_conversation(
+ prompt='hello', system_prompt='You are helpful.')
+ # first turn is a system message
+ self.assertEqual(conv[0]['role'], 'system')
+
+ def test_images_carried_in_user_content(self):
+ sentinel = object()
+ conv = self.app.build_conversation(
+ prompt='describe this', images=[sentinel])
+ # the sentinel image should appear somewhere in the first
+ # user-turn content
+ user_turn = next(m for m in conv if m['role'] == 'user')
+ # content is typically a list of dicts; flatten to a sequence
+ # of values and check for the sentinel
+ flat = []
+
+ def _walk(x):
+ if isinstance(x, dict):
+ for v in x.values():
+ _walk(v)
+ elif isinstance(x, list):
+ for v in x:
+ _walk(v)
+ else:
+ flat.append(x)
+
+ _walk(user_turn['content'])
+ self.assertIn(sentinel, flat)
+
+
+# ---------------------------------------------------------------------------
+# response_to_grounded_textdocument
+# ---------------------------------------------------------------------------
+
+class TestStoreResponse(unittest.TestCase):
+
+ def setUp(self):
+ self.app = make_test_app(make_metadata(call_helper=True))
+ self.mmif = Mmif(validate=False)
+ self.view = self.mmif.new_view()
+ self.app.sign_view(self.view, {})
+ self.view.new_contain(DocumentTypes.TextDocument)
+ self.view.new_contain(AnnotationTypes.Alignment)
+
+ def test_happy_path_creates_textdocument_and_alignment(self):
+ td, align = self.app.response_to_grounded_textdocument(
+ self.view, source='src1', response='generated text')
+ self.assertEqual(td.text_value, 'generated text')
+ self.assertEqual(align.get_property('source'), 'src1')
+ self.assertEqual(align.get_property('target'), td.id)
+
+ def test_reasoning_trace_none_does_not_raise(self):
+ # no exception
+ self.app.response_to_grounded_textdocument(
+ self.view, source='src1', response='text',
+ reasoning_trace=None)
+
+ def test_reasoning_trace_not_none_raises_not_implemented(self):
+ with self.assertRaises(NotImplementedError):
+ self.app.response_to_grounded_textdocument(
+ self.view, source='src1', response='text',
+ reasoning_trace='intermediate reasoning')
+
+ # TODO (krim @ 05/28/26): this test case belongs upstream in the
+ # vocabulary type definition (the `origins`/`origination` pairing
+ # is a property of the `Document` type, per clams-vocabulary#18,
+ # not a behavior of the SDK app layer). Move once clams-vocabulary
+ # supports conditional prop validation. For now, this is a sanity
+ # check that the SDK correctly forwards both kwargs through to the
+ # underlying TD.
+ def test_origins_and_origination_written_together(self):
+ td, align = self.app.response_to_grounded_textdocument(
+ self.view, source='tf1', response='caption text',
+ origins=['tp1'], origination='derived')
+ self.assertEqual(td.get_property('origins'), ['tp1'])
+ self.assertEqual(td.get_property('origination'), 'derived')
+ self.assertEqual(align.get_property('source'), 'tf1')
+ self.assertEqual(align.get_property('target'), td.id)
+
+ def test_unpaired_origins_or_origination_raises(self):
+ unpaired = [
+ {'origins': ['tp1']},
+ {'origination': 'derived'},
+ ]
+ for kwargs in unpaired:
+ with self.subTest(**kwargs), self.assertRaises(ValueError):
+ self.app.response_to_grounded_textdocument(
+ self.view, source='src1', response='text', **kwargs)
+
+
+# ---------------------------------------------------------------------------
+# Transport-neutral parameter casting
+# ---------------------------------------------------------------------------
+
+class TestTransportNeutralCasting(unittest.TestCase):
+ """
+ Just exercises the standard ``ClamsApp`` parameter-casting path.
+ Not envelope-specific; the point is that promptable apps see no
+ separate transport layer.
+ """
+
+ def test_multi_element_prompt_arrives_as_list_of_strings(self):
+ app = make_test_app(make_metadata(call_helper=True))
+ refined = app._refine_params(prompt=['a', 'b', 'c'])
+ self.assertEqual(refined['prompt'], ['a', 'b', 'c'])
+ for x in refined['prompt']:
+ self.assertIsInstance(x, str)
+
+ def test_single_element_prompt_still_list(self):
+ app = make_test_app(make_metadata(call_helper=True))
+ refined = app._refine_params(prompt=['only'])
+ self.assertEqual(refined['prompt'], ['only'])
+
+
+# ---------------------------------------------------------------------------
+# ClamsHFPromptableApp class-attribute validation
+# ---------------------------------------------------------------------------
+
+class TestHFPromptableAppClassAttrs(unittest.TestCase):
+ """
+ Exercises the class-attribute validation in
+ :class:`ClamsHFPromptableApp.__init__`. The actual model loading
+ is patched out so these tests don't require torch/transformers.
+ End-to-end inference tests live separately.
+ """
+
+ SINGLETON_AV = {'org/fake-model': 'deadbee'}
+ MULTI_AV = {
+ 'org/large-model': 'aaaaaaa',
+ 'org/small-model': 'bbbbbbb',
+ }
+
+ def _make_subclass(
+ self, *, model_cls=object,
+ analyzer_versions=None, **extra_attrs):
+ if analyzer_versions is None:
+ analyzer_versions = dict(self.SINGLETON_AV)
+ attrs = {
+ '_load_appmetadata': lambda self: make_metadata(
+ hf_helper=True,
+ analyzer_versions=dict(analyzer_versions),
+ ),
+ '_appmetadata': lambda self: None,
+ '_annotate': lambda self, mmif, **kw: mmif,
+ 'MODEL_CLS': model_cls,
+ }
+ attrs.update(extra_attrs)
+ from clams.app import ClamsHFPromptableApp
+ return type('TestHFApp', (ClamsHFPromptableApp,), attrs)
+
+ def test_missing_model_cls_raises(self):
+ cls = self._make_subclass(model_cls=None)
+ with self.assertRaises(ValueError) as ctx:
+ cls()
+ self.assertIn('MODEL_CLS', str(ctx.exception))
+
+ def test_missing_analyzer_versions_raises(self):
+ # Use the plain promptable helper so promptable params are
+ # injected (parent __init__ passes) but analyzer_versions is
+ # absent and ``model`` was never injected. HF __init__ should
+ # refuse on the analyzer_versions check.
+ from clams.app import ClamsHFPromptableApp
+ cls = type('TestHFAppBad', (ClamsHFPromptableApp,), {
+ '_load_appmetadata': lambda self: make_metadata(
+ call_helper=True), # plain promptable, no analyzer_versions
+ '_appmetadata': lambda self: None,
+ '_annotate': lambda self, mmif, **kw: mmif,
+ 'MODEL_CLS': object,
+ })
+ with self.assertRaises(ValueError) as ctx:
+ cls()
+ self.assertIn('analyzer_versions', str(ctx.exception))
+
+ def _patch_load(self):
+ """
+ Context-manager-ish helper that swaps in a fake ``load_hf_model``
+ recording every call. Returns ``(restore_fn, calls_list)``.
+ """
+ import clams.backends.hf as hf_module
+ original = hf_module.load_hf_model
+ calls = []
+
+ def fake_load(model_id, model_cls, **kwargs):
+ calls.append({'model_id': model_id, 'model_cls': model_cls, **kwargs})
+ # processor / model / device tuple uniquely identifiable
+ return (f'PROC:{model_id}@{kwargs.get("revision")}',
+ f'MODEL:{model_id}@{kwargs.get("revision")}',
+ 'cpu')
+
+ hf_module.load_hf_model = fake_load
+ return (lambda: setattr(hf_module, 'load_hf_model', original)), calls
+
+ def test_singleton_eagerly_preloads_in_init(self):
+ restore, calls = self._patch_load()
+ try:
+ cls = self._make_subclass(
+ analyzer_versions=self.SINGLETON_AV,
+ DTYPE='FAKE_DTYPE',
+ PADDING_SIDE='left',
+ MODEL_KWARGS={'trust_remote_code': True},
+ )
+ app = cls()
+ # eager load on the single family member
+ self.assertEqual(len(calls), 1)
+ self.assertEqual(calls[0]['model_id'], 'org/fake-model')
+ self.assertEqual(calls[0]['revision'], 'deadbee')
+ self.assertEqual(calls[0]['dtype'], 'FAKE_DTYPE')
+ self.assertEqual(calls[0]['padding_side'], 'left')
+ self.assertEqual(
+ calls[0]['model_kwargs'], {'trust_remote_code': True})
+ # self.processor / self.model / self.device populated
+ self.assertEqual(app.processor, 'PROC:org/fake-model@deadbee')
+ self.assertEqual(app.model, 'MODEL:org/fake-model@deadbee')
+ self.assertEqual(app.device, 'cpu')
+ finally:
+ restore()
+
+ def test_multimember_defers_loading(self):
+ restore, calls = self._patch_load()
+ try:
+ cls = self._make_subclass(analyzer_versions=self.MULTI_AV)
+ app = cls()
+ # no eager load for multi-member families
+ self.assertEqual(calls, [])
+ self.assertIsNone(app.processor)
+ self.assertIsNone(app.model)
+ self.assertIsNone(app.device)
+ finally:
+ restore()
+
+ def test_load_model_parses_at_revision_form_and_caches(self):
+ restore, calls = self._patch_load()
+ try:
+ cls = self._make_subclass(analyzer_versions=self.MULTI_AV)
+ app = cls()
+ # first call -- load via load_hf_model
+ app.load_model('org/large-model@aaaaaaa')
+ self.assertEqual(len(calls), 1)
+ self.assertEqual(calls[0]['model_id'], 'org/large-model')
+ self.assertEqual(calls[0]['revision'], 'aaaaaaa')
+ self.assertEqual(app.processor, 'PROC:org/large-model@aaaaaaa')
+ # second call same model -- cache hit, no new load
+ app.load_model('org/large-model@aaaaaaa')
+ self.assertEqual(len(calls), 1)
+ # switch to other family member -- new load
+ app.load_model('org/small-model@bbbbbbb')
+ self.assertEqual(len(calls), 2)
+ self.assertEqual(calls[1]['model_id'], 'org/small-model')
+ self.assertEqual(calls[1]['revision'], 'bbbbbbb')
+ self.assertEqual(app.processor, 'PROC:org/small-model@bbbbbbb')
+ # back to first -- still cached
+ app.load_model('org/large-model@aaaaaaa')
+ self.assertEqual(len(calls), 2)
+ self.assertEqual(app.processor, 'PROC:org/large-model@aaaaaaa')
+ finally:
+ restore()
+
+ def test_load_model_accepts_raw_form_looks_up_revision(self):
+ restore, calls = self._patch_load()
+ try:
+ cls = self._make_subclass(analyzer_versions=self.MULTI_AV)
+ app = cls()
+ app.load_model('org/small-model') # no @rev suffix
+ self.assertEqual(calls[0]['model_id'], 'org/small-model')
+ self.assertEqual(calls[0]['revision'], 'bbbbbbb')
+ finally:
+ restore()
+
+ def test_refine_params_expands_modelid_to_at_revision(self):
+ restore, _ = self._patch_load()
+ try:
+ cls = self._make_subclass(analyzer_versions=self.MULTI_AV)
+ app = cls()
+ refined = app._refine_params(
+ prompt=['hi'],
+ model=['org/large-model'],
+ )
+ self.assertEqual(refined['model'], 'org/large-model@aaaaaaa')
+ finally:
+ restore()
+
+ def test_singleton_default_lets_user_omit_modelid(self):
+ restore, _ = self._patch_load()
+ try:
+ cls = self._make_subclass(analyzer_versions=self.SINGLETON_AV)
+ app = cls()
+ # No model in input -- SDK fills in the singleton default,
+ # then our override expands it.
+ refined = app._refine_params(prompt=['hi'])
+ self.assertEqual(refined['model'], 'org/fake-model@deadbee')
+ finally:
+ restore()
+
+
+if __name__ == '__main__':
+ unittest.main()