clamsproject · clams-bot · Jun 9, 2026
diff --git a/docs/_apps/app-chapter-llama/index.md b/docs/_apps/app-chapter-llama/index.md
@@ -0,0 +1,8 @@
+---
+layout: posts
+classes: wide
+title: app-chapter-llama
+date: 1970-01-01T00:00:00+00:00
+---
+A CLAMS wrapper for Chapter-Llama, a video chaptering system that uses LLMs (Llama-3.1-8B-Instruct with LoRA adapters) to partition long videos into chapters with timestamps and titles. It processes speech transcripts (ASR) as text input to the LLM.
+- [v0.1.0](v0.1.0) ([`@bohJiang12`](https://github.com/bohJiang12))
diff --git a/docs/_apps/app-chapter-llama/v0.1.0/index.md b/docs/_apps/app-chapter-llama/v0.1.0/index.md
@@ -0,0 +1,107 @@
+---
+layout: posts
+classes: wide
+title: "Chapter-Llama (v0.1.0)"
+date: 2026-06-09T01:47:25+00:00
+---
+## About this version
+
+- Submitter: [bohJiang12](https://github.com/bohJiang12)
+- Submission Time: 2026-06-09T01:47:25+00:00
+- Prebuilt Container Image: [ghcr.io/clamsproject/app-chapter-llama:v0.1.0](https://github.com/clamsproject/app-chapter-llama/pkgs/container/app-chapter-llama/v0.1.0)
+- Release Notes
+
+    > release v0.1.0
+
+## About this app (See raw [metadata.json](metadata.json))
+
+**A CLAMS wrapper for Chapter-Llama, a video chaptering system that uses LLMs (Llama-3.1-8B-Instruct with LoRA adapters) to partition long videos into chapters with timestamps and titles. It processes speech transcripts (ASR) as text input to the LLM.**
+
+- App ID: [http://apps.clams.ai/app-chapter-llama/v0.1.0](http://apps.clams.ai/app-chapter-llama/v0.1.0)
+- App License: Apache 2.0
+- Source Repository: [https://github.com/clamsproject/app-chapter-llama](https://github.com/clamsproject/app-chapter-llama) ([source tree of the submitted version](https://github.com/clamsproject/app-chapter-llama/tree/v0.1.0))
+
+
+#### Inputs
+(**Note**: "*" as a property value means that the property is required but can be any value.)
+
+One of the following is required: [
+- [http://mmif.clams.ai/vocabulary/VideoDocument/v1](http://mmif.clams.ai/vocabulary/VideoDocument/v1) (required)
+(of any properties)
+
+- [http://mmif.clams.ai/vocabulary/AudioDocument/v1](http://mmif.clams.ai/vocabulary/AudioDocument/v1) (required)
+(of any properties)
+
+
+
+]
+
+
+#### Configurable Parameters
+(**Note**: _Multivalued_ means the parameter can have one or more values.)
+
+- `model`: optional, defaults to `asr-10k`
+
+    - Type: string
+    - Multivalued: False
+    - Choices: **_`asr-10k`_**, `asr-1k`, `captions_asr-10k`, `captions_asr-1k`
+
+
+    > Chapter-Llama LoRA model variant to use.
+- `windowTokenSize`: optional, defaults to `15000`
+
+    - Type: integer
+    - Multivalued: False
+
+
+    > Token size for each processing window. Longer videos are split into windows of this size.
+- `firstWindowOnly`: optional, defaults to `false`
+
+    - Type: boolean
+    - Multivalued: False
+    - Choices: **_`false`_**, `true`
+
+
+    > Process only the first window (for debugging).
+- `quantization`: optional, defaults to `4bit`
+
+    - Type: string
+    - Multivalued: False
+    - Choices: **_`4bit`_**, `8bit`, `none`
+
+
+    > Quantization method to reduce GPU memory usage. '4bit' reduces ~75%, '8bit' reduces ~50%, 'none' uses full precision.
+- `pretty`: optional, defaults to `false`
+
+    - Type: boolean
+    - Multivalued: False
+    - Choices: **_`false`_**, `true`
+
+
+    > The JSON body of the HTTP response will be re-formatted with 2-space indentation
+- `runningTime`: optional, defaults to `false`
+
+    - Type: boolean
+    - Multivalued: False
+    - Choices: **_`false`_**, `true`
+
+
+    > The running time of the app will be recorded in the view metadata
+- `hwFetch`: optional, defaults to `false`
+
+    - Type: boolean
+    - Multivalued: False
+    - Choices: **_`false`_**, `true`
+
+
+    > The hardware information (architecture, GPU and vRAM) will be recorded in the view metadata
+
+
+#### Outputs
+(**Note**: "*" as a property value means that the property is required but can be any value.)
+
+(**Note**: Not all output annotations are always generated.)
+
+- [http://mmif.clams.ai/vocabulary/Chapter/v6](http://mmif.clams.ai/vocabulary/Chapter/v6)
+    - _timeUnit_ = "milliseconds"
+
diff --git a/docs/_apps/app-chapter-llama/v0.1.0/metadata.json b/docs/_apps/app-chapter-llama/v0.1.0/metadata.json
@@ -0,0 +1,91 @@
+{
+  "name": "Chapter-Llama",
+  "description": "A CLAMS wrapper for Chapter-Llama, a video chaptering system that uses LLMs (Llama-3.1-8B-Instruct with LoRA adapters) to partition long videos into chapters with timestamps and titles. It processes speech transcripts (ASR) as text input to the LLM.",
+  "app_version": "v0.1.0",
+  "mmif_version": "1.1.0",
+  "app_license": "Apache 2.0",
+  "identifier": "http://apps.clams.ai/app-chapter-llama/v0.1.0",
+  "url": "https://github.com/clamsproject/app-chapter-llama",
+  "input": [
+    [
+      {
+        "@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1",
+        "required": true
+      },
+      {
+        "@type": "http://mmif.clams.ai/vocabulary/AudioDocument/v1",
+        "required": true
+      }
+    ]
+  ],
+  "output": [
+    {
+      "@type": "http://mmif.clams.ai/vocabulary/Chapter/v6",
+      "properties": {
+        "timeUnit": "milliseconds"
+      }
+    }
+  ],
+  "parameters": [
+    {
+      "name": "model",
+      "description": "Chapter-Llama LoRA model variant to use.",
+      "type": "string",
+      "choices": [
+        "asr-10k",
+        "asr-1k",
+        "captions_asr-10k",
+        "captions_asr-1k"
+      ],
+      "default": "asr-10k",
+      "multivalued": false
+    },
+    {
+      "name": "windowTokenSize",
+      "description": "Token size for each processing window. Longer videos are split into windows of this size.",
+      "type": "integer",
+      "default": 15000,
+      "multivalued": false
+    },
+    {
+      "name": "firstWindowOnly",
+      "description": "Process only the first window (for debugging).",
+      "type": "boolean",
+      "default": false,
+      "multivalued": false
+    },
+    {
+      "name": "quantization",
+      "description": "Quantization method to reduce GPU memory usage. '4bit' reduces ~75%, '8bit' reduces ~50%, 'none' uses full precision.",
+      "type": "string",
+      "choices": [
+        "4bit",
+        "8bit",
+        "none"
+      ],
+      "default": "4bit",
+      "multivalued": false
+    },
+    {
+      "name": "pretty",
+      "description": "The JSON body of the HTTP response will be re-formatted with 2-space indentation",
+      "type": "boolean",
+      "default": false,
+      "multivalued": false
+    },
+    {
+      "name": "runningTime",
+      "description": "The running time of the app will be recorded in the view metadata",
+      "type": "boolean",
+      "default": false,
+      "multivalued": false
+    },
+    {
+      "name": "hwFetch",
+      "description": "The hardware information (architecture, GPU and vRAM) will be recorded in the view metadata",
+      "type": "boolean",
+      "default": false,
+      "multivalued": false
+    }
+  ]
+}
diff --git a/docs/_apps/app-chapter-llama/v0.1.0/submission.json b/docs/_apps/app-chapter-llama/v0.1.0/submission.json
@@ -0,0 +1,6 @@
+{
+  "time": "2026-06-09T01:47:25+00:00",
+  "submitter": "bohJiang12",
+  "image": "ghcr.io/clamsproject/app-chapter-llama:v0.1.0",
+  "releasenotes": "release v0.1.0\n\n"
+}
diff --git a/docs/_data/app-index.json b/docs/_data/app-index.json
@@ -1,4 +1,14 @@
 {
+  "http://apps.clams.ai/app-chapter-llama": {
+    "description": "A CLAMS wrapper for Chapter-Llama, a video chaptering system that uses LLMs (Llama-3.1-8B-Instruct with LoRA adapters) to partition long videos into chapters with timestamps and titles. It processes speech transcripts (ASR) as text input to the LLM.",
+    "latest_update": "2026-06-09T01:47:25+00:00",
+    "versions": [
+      [
+        "v0.1.0",
+        "bohJiang12"
+      ]
+    ]
+  },
   "http://apps.clams.ai/smolvlm2-captioner": {
     "description": "Applies the SmolVLM2-2.2B-Instruct multimodal model to video frames selected by input TimeFrame annotations for prompt-driven captioning / scene description. Each invocation runs a single `prompt` against the TimeFrames selected by `tfLabels`; to apply different prompts to different label subsets (e.g. one prompt for slates, another for chyrons), run the app once per (`prompt`, `tfLabels`) combination. Per-TimeFrame captioning is composite: every frame sampled from a TF is fed to the model in a single prompt and yields one caption per TF. This app ships only the 2.2B-Instruct variant -- the largest and most general-purpose model in the SmolVLM2 family. The smaller (256M and 500M) SmolVLM2 releases are post-trained specifically for video-QA tasks and we do not expect them to generalize well, given their size.",
     "latest_update": "2026-06-01T20:28:09+00:00",

diff --git a/docs/_data/apps.json b/docs/_data/apps.json