diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml deleted file mode 100644 index 0f72760..0000000 --- a/.github/workflows/build-linux.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Build Linux AppImage - -on: - workflow_dispatch: - push: - tags: - - "v*" - -jobs: - appimage: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v5 - - - name: Install build deps - run: | - sudo apt-get update - sudo apt-get install -y \ - musl-tools \ - file \ - wget \ - desktop-file-utils \ - libfuse2 - - - name: Build musl launcher - run: | - mkdir -p dist - musl-gcc -O2 -static launcher.c -o dist/celune - file dist/celune - - - name: Prepare AppDir - run: | - mkdir -p Celune.AppDir - cp dist/celune Celune.AppDir/celune - ln -sf celune Celune.AppDir/AppRun - chmod +x Celune.AppDir/celune - - - name: Validate AppDir - run: | - test -x Celune.AppDir/celune - test -L Celune.AppDir/AppRun - test -f Celune.AppDir/celune.desktop - test -f Celune.AppDir/celune.png - file Celune.AppDir/celune - file Celune.AppDir/AppRun - desktop-file-validate Celune.AppDir/celune.desktop - - - name: Download appimagetool & Build AppImage - run: | - wget -O appimagetool https://github.com/AppImage/AppImageKit/releases/download/continuous/appimagetool-x86_64.AppImage - chmod +x appimagetool - ./appimagetool Celune.AppDir celune.appimage - - - name: Upload AppImage - uses: actions/upload-artifact@v6 - with: - name: Celune-Linux-x86_64 - path: celune.appimage diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml deleted file mode 100644 index 0afcb5c..0000000 --- a/.github/workflows/build-windows.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: Build Windows - -on: - workflow_dispatch: - push: - tags: - - "v*" - -jobs: - windows: - runs-on: windows-latest - - steps: - - uses: actions/checkout@v5 - - - name: Build launcher - shell: cmd - run: | - if not exist dist mkdir dist - - for /f "tokens=*" %%i in ('"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -find VC\Auxiliary\Build\vcvars64.bat') do set "VCVARS=%%i" - - if "%VCVARS%"=="" ( - echo vcvars64.bat not found - exit /b 1 - ) - - call "%VCVARS%" - - where cl - cl /nologo /O2 /MT launcher.c resources\celune.res /link /OUT:dist\celune.exe - - - name: Upload EXE - uses: actions/upload-artifact@v6 - with: - name: Celune-Windows-x86_64 - path: dist/celune.exe diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e90831e..0c7befb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,8 +57,8 @@ jobs: run: | uv run python -c "import celune; print(dir(celune))" - launcher-smoke-linux: - name: Linux Launcher Build Smoke Test + launcher-build-linux: + name: Linux Launcher Build runs-on: ubuntu-latest steps: @@ -68,56 +68,75 @@ jobs: - name: Install build tools run: | sudo apt-get update - sudo apt-get install -y musl-tools file + sudo apt-get install -y gcc wget desktop-file-utils libfuse2 - - name: Build launcher (musl static) - run: | - mkdir -p dist - musl-gcc -O2 -static launcher.c -o dist/celune + - name: Install uv + uses: astral-sh/setup-uv@v7 + + - name: Sync dependencies + run: uv sync --dev - - name: Validate launcher binary + - name: Install Nuitka into project environment + run: uv pip install --python .venv/bin/python Nuitka + + - name: Install appimagetool run: | - test -f dist/celune - test -x dist/celune - file dist/celune + wget -O /tmp/appimagetool https://github.com/AppImage/AppImageKit/releases/download/continuous/appimagetool-x86_64.AppImage + chmod +x /tmp/appimagetool + sudo mv /tmp/appimagetool /usr/local/bin/appimagetool - launcher-smoke-windows: - name: Windows Launcher Build Smoke Test + - name: Build Linux launcher artifacts + run: | + bash ./scripts/build_nuitka.sh + test -x bin/celune + test -x bin/celune-bin + test -f bin/celune.AppImage + + - name: Upload Linux launcher artifacts + uses: actions/upload-artifact@v6 + with: + name: Celune-linux-x64 + path: bin/ + + launcher-build-windows: + name: Windows Launcher Build runs-on: windows-latest steps: - name: Checkout repository uses: actions/checkout@v5 - - name: Build launcher - shell: cmd - run: | - if not exist dist mkdir dist - - for /f "tokens=*" %%i in ('"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -find VC\Auxiliary\Build\vcvars64.bat') do set "VCVARS=%%i" - - if "%VCVARS%"=="" ( - echo vcvars64.bat not found - exit /b 1 - ) + - name: Install uv + uses: astral-sh/setup-uv@v7 - call "%VCVARS%" + - name: Sync dependencies + shell: powershell + run: uv sync --dev - where cl - cl /nologo /O2 /MT launcher.c resources\celune.res /link /OUT:dist\celune.exe + - name: Install Nuitka into project environment + shell: powershell + run: uv pip install --python .venv\Scripts\python.exe Nuitka - - name: Validate EXE - shell: cmd + - name: Build Windows launcher artifacts + shell: powershell run: | - if not exist dist\celune.exe exit /b 1 + powershell -ExecutionPolicy Bypass -File .\scripts\build_nuitka.ps1 + if (!(Test-Path .\bin\celune.exe)) { throw "bin/celune.exe not found" } + if (!(Test-Path .\bin\celune-bin.exe)) { throw "bin/celune-bin.exe not found" } + + - name: Upload Windows launcher artifacts + uses: actions/upload-artifact@v6 + with: + name: Celune-win-x64 + path: bin/ notify-failure: name: Comment on CI Failure runs-on: ubuntu-latest needs: - python-checks - - launcher-smoke-linux - - launcher-smoke-windows + - launcher-build-linux + - launcher-build-windows if: ${{ always() && contains(join(needs.*.result, ','), 'failure') }} steps: @@ -132,18 +151,18 @@ jobs: EVENT_NAME: ${{ github.event_name }} PR_NUMBER: ${{ github.event.pull_request.number }} PYTHON_CHECKS_RESULT: ${{ needs.python-checks.result }} - LINUX_SMOKE_RESULT: ${{ needs.launcher-smoke-linux.result }} - WINDOWS_SMOKE_RESULT: ${{ needs.launcher-smoke-windows.result }} + LINUX_BUILD_RESULT: ${{ needs.launcher-build-linux.result }} + WINDOWS_BUILD_RESULT: ${{ needs.launcher-build-windows.result }} run: | failed_jobs="" if [ "$PYTHON_CHECKS_RESULT" = "failure" ]; then failed_jobs="${failed_jobs}- Anti AI Slop & Vibe Coding Checks failed"$'\n' fi - if [ "$LINUX_SMOKE_RESULT" = "failure" ]; then - failed_jobs="${failed_jobs}- Linux Launcher Build Smoke Test failed"$'\n' + if [ "$LINUX_BUILD_RESULT" = "failure" ]; then + failed_jobs="${failed_jobs}- Linux Launcher Build failed"$'\n' fi - if [ "$WINDOWS_SMOKE_RESULT" = "failure" ]; then - failed_jobs="${failed_jobs}- Windows Launcher Build Smoke Test failed"$'\n' + if [ "$WINDOWS_BUILD_RESULT" = "failure" ]; then + failed_jobs="${failed_jobs}- Windows Launcher Build failed"$'\n' fi body="$(printf "Detected potential slop in commit \`%s\`.\n\nThe following jobs have failed:\n%s\nPlease check the logs and unslop your code: %s\n" "$SHA" "$failed_jobs" "$RUN_URL")" diff --git a/.gitignore b/.gitignore index 4128098..34b3b35 100644 --- a/.gitignore +++ b/.gitignore @@ -221,7 +221,9 @@ __marimo__/ .tmp_hf_modules/ # Celune -# Celune has an output directory that is not meant to be part of Git. +# Celune has certain paths that are not meant to be included in the repository. # To prevent unnecessary speech samples from being included, it has been gitignored. # Other files that were once here have been moved to a unified data directory. +# Executable files are meant to be downloaded separately from Actions artifacts. outputs/ +bin/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..a2faa1d --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,190 @@ +# AGENTS.md + +## Project Overview + +Celune is a real-time local AI TTS character engine focused on expressive voice delivery, fast buffered speech generation, and a polished user experience. + +Celune supports multiple voice styles, configurable voice packs, frontend/API/extension modes, long-form narration, built-in DSP/audio controls, GPU inference, character responses, a Textual TUI, a FastAPI REST API, and a Gradio-based WebUI. + +The project targets Windows and Linux, supports Python 3.12 and 3.13, and is designed for consumer GPU hardware with VRAM presets from roughly 6 GB to 16 GB+. + +## Development Principles + +* Keep changes focused on the requested task. +* Avoid unrelated refactors. +* Prefer simple, maintainable code over clever code. +* Avoid unnecessary dependencies. +* Do not add placeholder implementations. +* Do not add TODO comments. +* Do not silently disable features to make tests pass. +* Preserve Celune's local-first, polished, anti-slop project identity. +* Reuse existing architecture instead of creating parallel systems. + +## Typing Style + +Prefer classic unions like `Union[str, int]` or `Optional[str]`, rather than using PEP 604 unions like `str | int` or `str | None`. + +Other typing features from e.g. PEP 585 or PEP 695 may be used normally. + +Avoid using broad types like `Any`, `object` or `T`, unless the function explicitly requires a broad type. + +Prefer concrete, meaningful types. + +## Reuse Existing Code + +Prefer reusable variables, constants, helpers, and project abstractions already present in the repository. + +Do not hardcode strings, colors, ports, paths, app names, status labels, or repeated values when the repository already defines them. + +Only hardcode or redefine values when importing the existing value would create a circular import, break architecture, create excessive coupling, or otherwise be impractical. + +## CI and Validation + +The canonical CI command is: + +```bash +python scripts/run_ci.py +``` + +On Windows, the path may appear as: + +```powershell +python scripts\run_ci.py +``` + +Prefixing it with `uv run` is not required, as it runs the CI commands with it already. + +Always use the CI script for validation unless explicitly instructed otherwise. + +Do not use: + +```text +- .\.venv\Scripts\python.exe +- python -m pytest +- pytest +- uv cache overrides +- UV_CACHE_DIR +- etc. +``` + +If for any reason any `uv` command exits with `Access is denied.` or `Permission denied` errors, apply `--no-cache` to `uv`, and try again. + +Do not modify the execution environment to work around failures. + +Before CI, format the repository with `uv run ruff format .`. + +Expected CI runtime is 3-5 minutes. + +If CI runtime exceeds 5 minutes: +- Assume it may have stalled. +- Stop it from running any further. +- Report that the CI has taken too long. +- Do not try to extend any timeouts. +- Do not try to work around the problem. +- Wait for any further guidance. + +After each task, run `scripts/update_docstrings.py` and then replace placeholders in docstrings like: + +```text +Describe this function. + +Args: + value: Value for `value`. + +Raises: + RuntimeError: If `RuntimeError` needs to be raised. + +Returns: + type: Result of this function. +``` + +with proper documentation, while preserving the docstring format. + +This process may leave some formatting inaccuracies, run `uv run ruff format .` again after completing docstrings. + +If CI fails or times out, report the actual failure clearly. Do not claim success. + +## Python and Environment + +* Supported Python versions are 3.12 and 3.13. +* Use `uv` for environment management. +* Do not use `pip` directly unless explicitly required. +* Do not assume CPU-only mode supports all features. CPU-only execution is only supported with Celune Mini. +* Be aware that many features require an RTX 30 series GPU or newer. + +## UI and WebUI + +Celune has a Textual terminal UI and a Gradio WebUI mounted through FastAPI. + +When modifying UI code: + +* Preserve Celune's visual identity. +* The WebUI should feel like a high-resolution counterpart to the TUI. +* Avoid generic Hugging Face Space-style design. +* Do not assume Gradio examples for older versions still apply. +* FastAPI is the application server; Gradio is mounted as the WebUI. +* Keep mobile/touch support in mind. +* Do not rely only on screen width for mobile behavior. Prefer pointer/hover media queries when the issue is input method. +* Desktop keyboard shortcuts must have visible button alternatives for touch devices. + +## API + +Celune exposes a REST API for programmatic use. + +When modifying API code: + +* Preserve existing endpoint behavior where practical. +* Reuse existing request and response models. +* Keep API behavior consistent with the TUI/WebUI runtime behavior. +* Do not make the WebUI depend on raw REST calls unless explicitly requested. + +## Audio and TTS + +Celune includes multiple TTS backends, voice styles, configurable voice packs, long-form narration support, built-in DSP, and native audio controls. + +When modifying audio code: + +* Prefer existing audio abstractions. +* Avoid adding large audio/game frameworks for small playback tasks. +* Do not bypass the existing playback, buffering, stream, or DSP infrastructure without a clear reason. +* Keep long-form narration stability in mind. +* Do not add markup/control tags to generated speech unless the backend explicitly supports them. + +## System Dependencies + +Celune may depend on external system tools such as SoX, Rubber Band, OpenRGB, CUDA Toolkit 12.8, symbolic link support on Windows, and C/C++ build tools for some backends. + +Do not remove checks, documentation, or fallback behavior for these dependencies without understanding the runtime impact. + +## Documentation + +Keep documentation concise, direct, and technically accurate. + +When documenting licensing, distinguish between: + +* Celune source code, licensed under MIT. +* Third-party models and assets, which may use their own licenses. + +Do not claim third-party models are covered by Celune's MIT license. + +When documenting commands, use the canonical project commands from the README. + +## Testing Behavior + +* Run relevant tests when practical. +* Prefer the full CI script for final validation. +* Do not silently narrow validation scope after a failure. +* If a test cannot be run, say why. +* If a command times out, report it as a timeout, not as a pass. +* Do not hide infrastructure failures behind vague wording. + +## If Unsure + +When unsure, prefer this order: + +1. Reuse existing project code. +2. Preserve current behavior. +3. Avoid new dependencies. +4. Keep the TUI, WebUI, API, and runtime consistent. +5. Run `python scripts/run_ci.py`. +6. Report failures honestly. diff --git a/GALLERY.md b/GALLERY.md new file mode 100644 index 0000000..723b219 --- /dev/null +++ b/GALLERY.md @@ -0,0 +1,27 @@ +# Gallery + +These images show off Celune's user interface. + +## Before init + +[![Before init](./demos/init.png)](./demos/init.png) + +## Ready + +[![Ready](./demos/ready.png)](./demos/ready.png) + +## Talking + +[![Talking](./demos/speaking.png)](./demos/speaking.png) + +## Change voice + +[![Change voice](./demos/change_voice.png)](./demos/change_voice.png) + +## Commands + +[![Commands](./demos/commands.png)](./demos/commands.png) + +## Extension invoke + +[![Extension invoke](./demos/extensions.png)](./demos/extensions.png) diff --git a/README.md b/README.md index e98bbf4..599609d 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ -![Celune](./resources/branding/celune_wordmark_transparent.png "Celune wordmark") +![Celune](./resources/branding/celune_wordmark.png "Celune wordmark") --- +![Python](https://img.shields.io/badge/Python-3.12%2F3.13-cebaff) ![License](https://img.shields.io/badge/License-MIT-cebaff) ![Platform](https://img.shields.io/badge/Platform-Windows%2FLinux-cebaff) ![VRAM](https://img.shields.io/badge/VRAM-6%20GB–16%20GB+-cebaff) + Celune is a real-time AI TTS character engine focused on expressive voice delivery with a well-made user experience. It has been designed to provide fast, high-quality voice generation, even on consumer-grade hardware. @@ -30,11 +32,15 @@ All decisions and implementations were reviewed, validated, and approved by huma Celune never was, and will never become an "AI slop" project. +## License note + +Celune is licensed under the [MIT license](https://opensource.org/license/mit), but the software may download certain models from [Hugging Face](https://huggingface.co) that are of varying licenses, such as [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) (Qwen, etc.) or [CC-BY-4.0](https://creativecommons.org/licenses/by/4.0/deed.en) (Pocket TTS). Users of Celune are expected to read and comply with any applicable license terms for the models they intend to use. + ## Voices & samples Each voice is demonstrated using a short introduction and a longer narration sample to showcase consistency, pacing, and expressiveness. -### Qwen +### Qwen3-TTS | Voice | Intro | Narration | |---|---|---| @@ -52,7 +58,7 @@ Each voice is demonstrated using a short introduction and a longer narration sam | Bold | [▶️ Play](https://gabalpha.github.io/read-audio/?p=https://raw.githubusercontent.com/celunah/celune/main/demos/bold_sc_voxcpm2.wav) | [▶️ Play](https://gabalpha.github.io/read-audio/?p=https://raw.githubusercontent.com/celunah/celune/main/demos/bold_lc_voxcpm2.wav) | | Upbeat | [▶️ Play](https://gabalpha.github.io/read-audio/?p=https://raw.githubusercontent.com/celunah/celune/main/demos/upbeat_sc_voxcpm2.wav) | [▶️ Play](https://gabalpha.github.io/read-audio/?p=https://raw.githubusercontent.com/celunah/celune/main/demos/upbeat_lc_voxcpm2.wav) | -### Celune Mini +### Celune Mini (Pocket TTS) | Voice | Intro | Narration | |---|---|---| @@ -115,6 +121,12 @@ The terminal should support True Color, especially when using voice packs that d Terminals not supporting True Color may look incorrect, as Textual will fall back to a lower color mode. +If Celune looks incorrect while your terminal supports True Color, run Celune with the following command: + +```bash +COLORTERM=truecolor celune +``` + If Rubber Band is not installed or fails to run, Celune will speak at normal speed, and speed controls will be unavailable. ## VRAM presets & requirements @@ -161,6 +173,8 @@ Tested on: RTX 5070 (12 GB VRAM) ## Installation +Download and extract the [latest binary artifact](https://nightly.link/celunah/celune/workflows/ci/main) prior to running the below commands. + ```bash # Download Celune git clone https://github.com/celunah/celune @@ -188,16 +202,20 @@ uv sync # Run # Command Prompt users -celune +bin\celune # PowerShell users -.\celune.exe +.\bin\celune.exe # Or on Unix systems: -./celune.AppImage +./bin/celune.AppImage ``` -You can also open Celune from within your desktop by running the aforementioned executables. They are usable as an entry point. +Don't run `celune-bin` manually. The `celune` binary is Celune's main entrypoint. + +Both binaries are required for correct operation, `celune-bin` contains core code, while `celune` is the outer launcher. + +Celune can also run from other working directories, provided the main binary is installed correctly. ### SoX & Rubber Band installation @@ -206,13 +224,13 @@ If SoX & Rubber Band are already installed, you can skip this section. #### Windows (Scoop) ```bat -# Install Scoop if you don't already have it +REM Install Scoop if you don't already have it powershell -ExecutionPolicy RemoteSigned -c "irm https://get.scoop.sh | iex" -# Install SoX +REM Install SoX scoop install sox -# Install Rubber Band +REM Install Rubber Band scoop install rubberband ``` @@ -250,7 +268,7 @@ To install OpenRGB, go to , download and install a package Celune's VoxCPM2 backend may require a C/C++ compiler to compile dependencies. To install a suitable compiler, run one of the following commands: -This is not required to use the Qwen backend, but you may need to install dependencies manually. +This is not required to use other backends, but you may need to install dependencies manually. ```bash # Windows @@ -302,33 +320,10 @@ Without this, Celune may require elevated permissions or fall back to slower beh See [API.md](./API.md) for REST API configuration, authentication, endpoints, and cURL examples. The API allows programmatic usage of all Celune features. It can be used both as a public and local interface. -## Screenshots - -The below images showcase Celune's user interface. - -### Before init - -[![Before init](./demos/init.png)](./demos/init.png) - -### Ready - -[![Ready](./demos/ready.png)](./demos/ready.png) - -### Talking - -[![Talking](./demos/speaking.png)](./demos/speaking.png) - -### Change voice - -[![Change voice](./demos/change_voice.png)](./demos/change_voice.png) - -### Commands - -[![Commands](./demos/commands.png)](./demos/commands.png) - -### Extension invoke +## Web UI -[![Extension invoke](./demos/extensions.png)](./demos/extensions.png) +Celune exposes a web interface for remote access to Celune. It reuses the Celune API commands to provide an interface for control. +It can be accessed via `/ui` on Celune's exposed API URL. > *"Your voice, your way."* diff --git a/celune.AppImage b/celune.AppImage deleted file mode 100755 index f0348de..0000000 Binary files a/celune.AppImage and /dev/null differ diff --git a/celune.exe b/celune.exe deleted file mode 100755 index 921bdce..0000000 Binary files a/celune.exe and /dev/null differ diff --git a/celune/__init__.py b/celune/__init__.py index 7f9b6d9..4da7c9f 100644 --- a/celune/__init__.py +++ b/celune/__init__.py @@ -63,9 +63,9 @@ def _caller_is_repl() -> bool: if REVISION: _local = REVISION.rstrip("*") _dirty = ".dirty" if REVISION.endswith("*") else "" - __version__ = f"4.0.1+{_local}{_dirty}" + __version__ = f"4.1.0+{_local}{_dirty}" else: - __version__ = "4.0.1+unknown" + __version__ = "4.1.0+unknown" __tagline__ = '"Your voice, your way."' __codename__ = "Personality" diff --git a/celune/analysis.py b/celune/analysis.py index 388096d..0d22213 100644 --- a/celune/analysis.py +++ b/celune/analysis.py @@ -4,96 +4,44 @@ import pathlib import warnings import contextlib -from collections.abc import Mapping -from typing import Optional, Protocol, TypedDict, Union, cast +from pathlib import Path +from typing import Optional, cast import torch import librosa import matplotlib import numpy as np import numpy.typing as npt -from matplotlib import rcParams from matplotlib.projections import PolarAxes -from matplotlib import pyplot as plt -from matplotlib import colors as mcolors +from matplotlib import rcParams, font_manager, pyplot as plt, colors as mcolors from transformers import AutoModel, AutoProcessor from .cevoice import ManifestValue, default_loader from .constants import VOICE_EMBEDDING_MODEL, N_A_NUMERIC +from .typing.analysis import ( + EmbeddingPayload, + EmbeddingModel, + EmbeddingProcessor, + TextConfig, + TextConfigValue, + VoiceMatch, +) matplotlib.use("Agg") # this font is included within Celune # check resources/fonts to find the font files +font_path = Path(__file__).resolve().parent / "resources" / "fonts" +if font_path.exists(): + for font in font_path.iterdir(): + if font.suffix.lower() in {".ttf", ".otf"}: + font_manager.fontManager.addfont(font) + rcParams["font.family"] = "Outfit Thin" warnings.filterwarnings("ignore", category=UserWarning) warnings.filterwarnings("ignore", category=FutureWarning) -type TextConfigValue = Union[str, dict[str, "TextConfigValue"]] -type TextConfig = dict[str, TextConfigValue] -type EmbeddingPayload = Union[ - torch.Tensor, - npt.NDArray[np.float32], - list[float], - Mapping[str, "EmbeddingPayload"], -] - - -class EmbeddingOutput(Protocol): - """Speaker embedding model output used by Celune analysis.""" - - last_hidden_state: EmbeddingPayload - - -class EmbeddingProcessor(Protocol): - """Processor callable returned by the embedding model package.""" - - def __call__( - self, - y: npt.NDArray[np.float32], - *, - sampling_rate: int, - ) -> Mapping[str, torch.Tensor]: - """Prepare model inputs from a waveform.""" - raise NotImplementedError("protocol not defined") - - -class EmbeddingModel(Protocol): - """Embedding model behavior used by Celune analysis.""" - - def eval(self) -> None: - """Switch the model into evaluation mode. - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - def to(self, device: torch.device) -> torch.nn.Module: - """Move the model to a device. - - Args: - device: A device to dispatch to. - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - def __call__(self, **inputs: torch.Tensor) -> EmbeddingOutput: - """Run embedding inference.""" - raise NotImplementedError("protocol not defined") - - -class VoiceMatch(TypedDict): - """Similarity score for one reference voice.""" - - voice: str - cosine: float - percent: float - - _EMBEDDING_MODEL: Optional[EmbeddingModel] = None _EMBEDDING_PROCESSOR: Optional[EmbeddingProcessor] = None diff --git a/celune/api.py b/celune/api.py index b749d15..f92eec6 100644 --- a/celune/api.py +++ b/celune/api.py @@ -9,12 +9,13 @@ import datetime import textwrap import threading -from pathlib import Path -from dataclasses import dataclass from hmac import compare_digest +from dataclasses import dataclass +from html import escape from collections import defaultdict, deque from typing import Callable, Iterator, Optional, Union +import gradio as gr import uvicorn import numpy as np import numpy.typing as npt @@ -26,20 +27,24 @@ JSONResponse, Response, StreamingResponse, - HTMLResponse, FileResponse, + RedirectResponse, ) - from . import __version__ +from . import colors from .celune import Celune from .utils import format_error +from .paths import main_window_log_path, project_root from .dsp import _resample_audio from .pipeline import SpeechStreamQueue from .constants import BASE_SR, APP_NAME +from .cevoice import default_loader +from .ui import resources as ui_resources +from .ui.app import CeluneUI api = FastAPI(title=f"{APP_NAME}API") -bound_celune: Optional["Celune"] = None +bound_celune: Optional[Celune] = None auth_token: Optional[str] = None rate_limit_per_minute = 60 rate_limit_lock = threading.Lock() @@ -48,6 +53,243 @@ speech_jobs_lock = threading.Lock() speech_jobs: dict[str, "SpeechJob"] = {} speech_job_ttl_seconds = 15 * 60 +webui_log_lines: deque[tuple[str, str]] = deque(maxlen=240) +webui_status_text = "Waiting for response" +webui_status_severity = "info" +webui_logs_seeded = False +webui_resource_page = 0 +webui_last_resource_advance = 0.0 +webui_last_probed_state: Optional[str] = None +webui_input_locked = True +webui_input_placeholder = "Please wait" +webui_voice_locked = True +webui_theme_style = "" +webui_status_source = "probe" +webui_status_updated_at = 0.0 +WEBUI_RESOURCE_ROTATE_SECONDS = 2.06 +WEBUI_POLL_INTERVAL_SECONDS = WEBUI_RESOURCE_ROTATE_SECONDS / 4 +WEBUI_STATUS_PROBE_DEBOUNCE_SECONDS = 0.9 +WEBUI_HEAD = textwrap.dedent( + """ + + + """ +) + +WEBUI_CSS = textwrap.dedent( + """ + @import url('https://fonts.googleapis.com/css2?family=Outfit:wght@100..900&display=swap'); + @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@100..800&display=swap'); + + html, + body, + gradio-app { + background: var(--celune-background, #1d1826) !important; + } + + .column { + place-content: center; + } + + .gradio-container { + background: var(--celune-background, #1d1826); + font-family: Outfit, sans-serif !important; + height: 100dvh; + overflow: hidden; + } + + .gradio-container > .main, + .gradio-container .wrap, + .gradio-container .loading-container, + .gradio-container .loading-container > div { + background: var(--celune-background, #1d1826) !important; + } + + .main { + flex: 1 1 auto !important; + min-height: 0; + } + + body { + font-family: Outfit, sans-serif; + } + + #celune-shell { + display: flex; + flex-direction: column; + gap: 0.75rem; + height: calc(100dvh - 2rem); + min-height: 0; + } + + #celune-header { + display: flex; + place-items: center; + } + + #celune-header .line { + width: 100%; + background: var(--celune-primary, #cebaff); + height: 2px; + } + + #celune-header .title { + font-weight: bold; + padding: 0 2em; + color: var(--celune-primary, #cebaff); + } + + button#celune-style, button#celune-send { + background: var(--celune-button-bg, #3a304c); + color: var(--celune-primary, #cebaff); + border-radius: 4px; + } + + button#celune-style:hover, button#celune-send:hover { + background: var(--celune-button-hover, #443a56); + } + + #celune-log-panel { + border: 2px solid var(--celune-primary, #cebaff); + padding: 1em; + border-radius: 8px; + max-height: min(75dvh, calc(100dvh - 20rem)); + overflow: hidden; + flex: 1 1 auto; + min-height: 0; + } + + #celune-log-panel pre { + font-family: "JetBrains Mono", monospace; + color: var(--celune-primary, #cebaff); + white-space: pre-wrap; + margin: 0; + max-height: min(calc(75dvh - 2em), calc(75dvh - 15rem)); + height: 100%; + overflow-y: auto; + padding-right: 0.75em; + scrollbar-gutter: stable both-edges; + } + + #celune-input textarea { + background: var(--celune-input-bg, #3a304c); + color: var(--celune-primary, #cebaff); + border-radius: 4px; + } + + #celune-input textarea::placeholder { + color: var(--celune-placeholder, #9c88ce); + } + + #celune-resources .footer-block { + text-align: right; + color: var(--celune-primary, #cebaff); + } + + .webui-desktop-only { + display: inline !important; + color: inherit; + } + + .webui-mobile-only { + display: none !important; + color: inherit; + } + + #celune-actions { + gap: 0.75rem; + } + + button#celune-style, + button#celune-send { + min-height: 2.75rem; + } + + #celune-input-row, #celune-footer { + padding: 0 1em; + } + + button#celune-send { + display: none; + } + + @media (max-width: 768px), (any-pointer: coarse), (hover: none) { + .gradio-container { + height: 100dvh; + overflow: hidden; + } + + #celune-shell { + height: calc(100dvh - 8rem); + min-height: 0; + } + + #celune-input-row { + flex-direction: column; + } + + #celune-actions { + display: flex; + flex-direction: row; + width: 100%; + flex-wrap: nowrap; + gap: 0; + } + + #celune-actions > * { + flex: 1 1 0 !important; + min-width: 0 !important; + } + + button#celune-style, + button#celune-send { + width: 100%; + } + + button#celune-send { + display: flex; + } + + #celune-input textarea, #celune-input textarea::placeholder { + text-align: center; + } + + #celune-log-panel { + max-height: min(52dvh, calc(100dvh - 12rem)); + } + + #celune-log-panel pre { + max-height: min(calc(52dvh - 2em), calc(100dvh - 14rem)); + } + + button#celune-style { + border-radius: 4px 0 0 4px; + border-right: 1px solid color-mix( + in srgb, + var(--celune-primary, #cebaff) 50%, + black + ); + } + + button#celune-send { + border-radius: 0 4px 4px 0; + border-left: 1px solid color-mix( + in srgb, + var(--celune-primary, #cebaff) 50%, + black + ); + } + + .webui-desktop-only { + display: none !important; + } + + .webui-mobile-only { + display: inline !important; + } + } + """ +) @dataclass @@ -60,6 +302,56 @@ class SpeechJob: error: Optional[str] = None +def _configure_webui_theme() -> None: + """Sync the browser UI palette with the active CEVOICE-derived theme.""" + global webui_theme_style + + colors.configure_theme() + loader = default_loader() + if loader is not None: + theme = loader.bundle.metadata.get("theme") + if isinstance(theme, dict): + background = theme.get("background") + accent = theme.get("accent") + faded_accent = theme.get("faded_accent") + if faded_accent is None: + faded_accent = theme.get("sleeping_color") + if ( + isinstance(background, str) + and isinstance(accent, str) + and (faded_accent is None or isinstance(faded_accent, str)) + ): + colors.configure_theme(background, accent, faded_accent) + + background = colors.THEME.background or "#1d1826" + palette = colors.SEVERITY_COLORS["celune"] + primary = palette["info"] + foreground = colors.THEME.foreground or "#ffffff" + secondary = colors.THEME.secondary or primary + accent = colors.THEME.accent or primary + sleeping = palette["sleeping"] + button_bg = colors._blend(primary, background, 0.72) + button_hover = colors._blend(primary, background, 0.6) + input_bg = colors._blend(primary, background, 0.78) + + webui_theme_style = ( + "" + ) + + class StartedServer(uvicorn.Server): """Uvicorn server that reports when socket binding actually succeeds.""" @@ -148,6 +440,12 @@ def _authenticated(request: Request) -> bool: return given is not None and compare_digest(given, auth_token) +def _is_browser_ui_request(request: Request) -> bool: + """Return whether the request targets the mounted browser UI.""" + path = request.url.path.rstrip("/") + return path == "/ui" or path.startswith("/ui/") + + def _rate_limit_key(request: Request) -> str: """Return the client key used for rate limiting.""" if request.client is None: @@ -190,6 +488,9 @@ async def api_security( Returns: Response: The response returned by the protected route or security layer. """ + if _is_browser_ui_request(request): + return await call_next(request) + if not _authenticated(request): return JSONResponse( status_code=401, @@ -227,6 +528,208 @@ def bind_celune(celune: Celune) -> None: """ global bound_celune bound_celune = celune + global webui_resource_page, webui_last_resource_advance, webui_last_probed_state + global webui_input_locked, webui_input_placeholder, webui_voice_locked + webui_resource_page = 0 + webui_last_resource_advance = 0.0 + webui_last_probed_state = None + _configure_webui_theme() + webui_input_locked = celune.locked + webui_input_placeholder = ( + "Currently in tutorial mode" + if celune.is_in_tutorial + else "Please wait" + if celune and celune.locked + else "Enter text to speak here" + ) + webui_voice_locked = len(celune.voices) < 2 or celune.is_in_tutorial + _seed_webui_logs() + _wrap_celune_callbacks(celune) + if celune.current_voice: + _append_webui_log(f"Voice ready: {celune.current_voice}.") + _set_webui_status( + "Idle" if celune.cur_state == "idle" else celune.cur_state.title(), + source="probe", + ) + + +def _webui_status_color(severity: str) -> str: + """Return the browser UI color for a given severity.""" + palette = colors.SEVERITY_COLORS.get("celune", colors.SEVERITY_COLORS["celune"]) + return palette.get(severity, palette["info"]) + + +def _webui_log_line_html(message: str, severity: str = "info") -> str: + """Render one browser log line with severity-aware coloring.""" + color = _webui_status_color(severity) + return f'{escape(message)}' + + +def _strip_webui_log_prefix(line: str) -> str: + """Remove persisted timestamp and severity prefixes from one log line.""" + stripped = line.strip() + if stripped.startswith("[") and "] " in stripped: + stripped = stripped.split("] ", 1)[1] + if stripped.startswith("[") and "] " in stripped: + stripped = stripped.split("] ", 1)[1] + return stripped + + +def _seed_webui_logs() -> None: + """Populate the browser log view from the persisted desktop log when available.""" + global webui_logs_seeded + if webui_logs_seeded: + return + + webui_logs_seeded = True + path = main_window_log_path() + if not path.exists(): + return + + try: + lines = path.read_text(encoding="utf-8").splitlines()[-180:] + except OSError: + return + + for line in lines: + lowered = line.lower() + severity = "info" + if "[warning]" in lowered: + severity = "warning" + elif "[error]" in lowered: + severity = "error" + webui_log_lines.append((_strip_webui_log_prefix(line), severity)) + + +def _append_webui_log(msg: str, severity: str = "info") -> None: + """Store one browser log line.""" + webui_log_lines.append((msg, severity)) + + +def _set_webui_status( + msg: str, + severity: str = "info", + *, + source: str = "callback", + updated_at: Optional[float] = None, +) -> None: + """Update the browser UI status line.""" + global webui_status_text, webui_status_severity + global webui_status_source, webui_status_updated_at + webui_status_text = msg + webui_status_severity = severity + webui_status_source = source + webui_status_updated_at = time.monotonic() if updated_at is None else updated_at + + +def _probed_status_text(celune: Celune) -> tuple[str, str]: + """Return the best-effort footer status derived from Celune's live state.""" + state = (celune.cur_state or "").strip().lower() + return { + "idle": ("Idle", "info"), + "speaking": ("Speaking", "info"), + "thinking": ("Thinking", "info"), + "waking": ("Waking up", "info"), + "reloading": ("Reloading", "info"), + "sleeping": ("Sleeping", "sleeping"), + "init": ("Initializing", "info"), + "generating": ("Generating", "info"), + "error": (f"{APP_NAME} could not continue", "error"), + }.get(state, (state.title() if state else "Initializing", "info")) + + +def _probe_webui_runtime() -> None: + """Poll the live runtime so the WebUI footer updates even without new log lines.""" + global webui_last_probed_state, webui_resource_page, webui_last_resource_advance + + celune = bound_celune + if celune is None: + return + + now = time.monotonic() + current_state = (celune.cur_state or "").strip().lower() + if current_state != webui_last_probed_state: + if current_state == "sleeping": + _append_webui_log( + f"{APP_NAME} is currently sleeping. Type anything to wake up.", + "sleeping", + ) + status_text, severity = _probed_status_text(celune) + should_override_status = ( + webui_last_probed_state is None + or webui_status_text == "Starting up" + or webui_status_source != "callback" + or now - webui_status_updated_at >= WEBUI_STATUS_PROBE_DEBOUNCE_SECONDS + or current_state in {"idle", "sleeping", "error"} + ) + if should_override_status: + _set_webui_status( + status_text, + severity, + source="probe", + updated_at=now, + ) + webui_last_probed_state = current_state + + pages = ui_resources.resource_pages(celune, "celune") + if not pages: + return + + if webui_last_resource_advance <= 0: + webui_last_resource_advance = now + return + + if now - webui_last_resource_advance >= WEBUI_RESOURCE_ROTATE_SECONDS: + webui_resource_page = (webui_resource_page + 1) % len(pages) + webui_last_resource_advance = now + + +def _wrap_celune_callbacks(celune: Celune) -> None: + """Mirror Celune callbacks into browser UI state without replacing existing handlers.""" + if getattr(celune, "_webui_callbacks_wrapped", False): + return + + original_log = celune.log_callback + original_status = celune.status_callback + original_voice_changed = celune.voice_changed_callback + original_input_state = celune.change_input_state_callback + original_voice_lock_state = celune.change_voice_lock_state_callback + + def wrapped_log(msg: str, severity: str = "info") -> None: + _append_webui_log(msg, severity) + original_log(msg, severity) + + def wrapped_status(msg: str, severity: str = "info") -> None: + _set_webui_status(msg, severity, source="callback") + original_status(msg, severity) + + def wrapped_voice_changed(name: str) -> None: + _append_webui_log(f"Voice changed to {name}.") + original_voice_changed(name) + + def wrapped_input_state(locked: bool) -> None: + global webui_input_locked, webui_input_placeholder + webui_input_locked = locked + webui_input_placeholder = ( + "Currently in tutorial mode" + if celune.is_in_tutorial + else "Please wait" + if celune and locked + else "Enter text to speak here" + ) + original_input_state(locked) + + def wrapped_voice_lock_state(locked: bool) -> None: + global webui_voice_locked + webui_voice_locked = locked + original_voice_lock_state(locked) + + celune.log_callback = wrapped_log + celune.status_callback = wrapped_status + celune.voice_changed_callback = wrapped_voice_changed + celune.change_input_state_callback = wrapped_input_state + celune.change_voice_lock_state_callback = wrapped_voice_lock_state + setattr(celune, "_webui_callbacks_wrapped", True) def require_celune() -> Celune: @@ -258,7 +761,12 @@ def api_log(action: str, content: str, suffix: str = "") -> None: preview = content.replace("\n", "\\n").replace("\r", "\\r")[:64] if len(content) > 64: preview += "..." - print(f"[{timestamp}] {action} {preview!r}{suffix}", flush=True) + _append_webui_log(f"{action} {preview!r}{suffix}") + try: + print(f"[{timestamp}] {action} {preview!r}{suffix}", flush=True) + except ValueError: + # Some embedded launch paths can close stdout while the WebUI stays alive. + pass def _normalized_audio(audio: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]: @@ -311,6 +819,23 @@ def audio_bytes(chunks: SpeechStreamQueue) -> Iterator[bytes]: yield _flac_bytes(np.empty((0, 2), dtype=np.float32)) +def _webui_audio_array(chunks: SpeechStreamQueue) -> npt.NDArray[np.float32]: + """Collect queued audio into a frame-major float32 array for Gradio playback.""" + audio_chunks: list[npt.NDArray[np.float32]] = [] + while True: + item = chunks.get() + if item is None: + break + if isinstance(item, Exception): + raise item + audio_chunks.append(_normalized_audio(item)) + + if not audio_chunks: + return np.empty((0, 2), dtype=np.float32) + + return np.concatenate(audio_chunks) + + def stream_headers() -> dict[str, str]: """Return headers describing the FLAC response. @@ -386,6 +911,424 @@ def _collect_speech_job(job_id: str, chunks: SpeechStreamQueue) -> None: _update_speech_job(job_id, status="completed", audio=audio) +def _webui_logs_html() -> str: + """Render the mirrored log buffer as terminal-like HTML.""" + if not webui_log_lines: + content = _webui_log_line_html("Waiting for response...") + else: + content = "\n".join( + _webui_log_line_html(line, severity) for line, severity in webui_log_lines + ) + return f'
{content}
' + + +def _webui_status_html() -> str: + """Render the footer status cell.""" + color = _webui_status_color(webui_status_severity) + return ( + '' + ) + + +def _webui_resources_html() -> str: + """Render the footer resource cell.""" + celune = bound_celune + resource = "" + if celune is not None: + pages = ui_resources.resource_pages(celune, "celune") + if pages: + resource = pages[webui_resource_page % len(pages)] + if "CTRL+" in resource: + return ( + '" + ) + return f'' + + +def _voice_button_update() -> dict[str, object]: + """Return the current browser voice-button state.""" + celune = bound_celune + if celune is None: + return gr.update(value="Loading", interactive=False) + + voice_name = celune.current_voice or ( + celune.voices[0] if celune.voices else "Voice" + ) + interactive = ( + not webui_voice_locked + if getattr(celune, "_webui_callbacks_wrapped", False) + else len(celune.voices) >= 2 and not celune.is_in_tutorial + ) + return gr.update( + value=voice_name.capitalize(), + interactive=interactive, + ) + + +_WEBUI_UNSET = object() + + +def _input_update(value: object = _WEBUI_UNSET) -> dict[str, object]: + """Return the current browser input state.""" + has_value = value is not _WEBUI_UNSET + celune = bound_celune + if celune is None: + if has_value: + return gr.update( + value=value, + interactive=False, + placeholder="Please wait", + ) + return gr.update( + interactive=False, + placeholder="Please wait", + ) + if celune.is_in_tutorial: + if has_value: + return gr.update( + value=value, + interactive=False, + placeholder="Currently in tutorial mode", + ) + return gr.update( + interactive=False, + placeholder="Currently in tutorial mode", + ) + if getattr(celune, "_webui_callbacks_wrapped", False): + interactive = not webui_input_locked + placeholder = webui_input_placeholder + else: + interactive = not celune.locked + placeholder = ( + "Please wait" if celune and celune.locked else "Enter text to speak here" + ) + if has_value: + return gr.update( + value=value, + interactive=interactive, + placeholder=placeholder, + ) + return gr.update( + interactive=interactive, + placeholder=placeholder, + ) + + +def _send_button_update() -> dict[str, object]: + """Return the current browser send-button state.""" + celune = bound_celune + if celune is None: + return gr.update(interactive=False) + interactive = ( + not webui_input_locked + if getattr(celune, "_webui_callbacks_wrapped", False) + else not celune.is_in_tutorial and not celune.locked + ) + return gr.update(interactive=interactive) + + +def _webui_snapshot() -> tuple[ + str, + str, + str, + dict[str, object], + dict[str, object], + dict[str, object], +]: + """Return the current browser UI snapshot.""" + _seed_webui_logs() + _probe_webui_runtime() + return ( + _webui_logs_html(), + _webui_status_html(), + _webui_resources_html(), + _voice_button_update(), + _send_button_update(), + _input_update(), + ) + + +def _webui_submit_snapshot( + input_value: Optional[str], +) -> tuple[ + dict[str, object], + str, + str, + str, + dict[str, object], + dict[str, object], +]: + """Return a browser snapshot shaped for submit/click handlers.""" + logs_html, status_html, resources_html, voice_update, send_update, _input = ( + _webui_snapshot() + ) + return ( + _input_update(input_value), + logs_html, + status_html, + resources_html, + voice_update, + send_update, + ) + + +def _webui_run_command(text: str) -> bool: + """Run one slash command through the main UI command path when available.""" + ui = CeluneUI._instance + if ui is None: + _append_webui_log( + f"{APP_NAME} must be running to run commands.", + "warning", + ) + return False + + try: + parts = CeluneUI._split_command_input(text[1:]) + except ValueError as e: + _append_webui_log(f"Command parsing error: {e}", "error") + return False + + if not parts: + return False + + command = parts[0].lower() + command_args = parts[1:] + ui.call_from_thread(ui.process_command, command, command_args) + return True + + +def _webui_speak( + content: str, +) -> Iterator[ + tuple[ + dict[str, object], + object, + str, + str, + str, + dict[str, object], + dict[str, object], + ] +]: + """Speak text through the browser UI and return browser audio playback.""" + text = content.strip() + if not text: + snapshot = _webui_submit_snapshot("") + yield snapshot[0], None, *snapshot[1:] + return + + if text.startswith("/"): + _webui_run_command(text) + snapshot = _webui_submit_snapshot("") + yield snapshot[0], None, *snapshot[1:] + return + + celune = require_celune() + api_log("SPEAK(WEBUI)", text) + + current_state = (celune.cur_state or "").strip().lower() + if current_state == "waking": + _append_webui_log( + f"{APP_NAME} has not yet returned from sleep mode.", "warning" + ) + snapshot = _webui_submit_snapshot(text) + yield snapshot[0], None, *snapshot[1:] + return + + if getattr(celune, "sleeping", False): + _set_webui_status("Waking up") + snapshot = _webui_submit_snapshot(text) + yield snapshot[0], None, *snapshot[1:] + if not celune.wake_from_sleep(): + snapshot = _webui_submit_snapshot(text) + yield snapshot[0], None, *snapshot[1:] + return + + chunks = celune.say_stream(text, save=True) + if chunks is None: + _append_webui_log("I'm currently busy. Try again later.", "warning") + snapshot = _webui_submit_snapshot(text) + yield snapshot[0], None, *snapshot[1:] + return + + snapshot = _webui_submit_snapshot("") + yield snapshot[0], None, *snapshot[1:] + + audio_chunks: list[npt.NDArray[np.float32]] = [] + + try: + while True: + item = chunks.get() + if item is None: + break + if isinstance(item, Exception): + raise item + + audio_chunks.append(_normalized_audio(item)) + audio_value: object + if audio_chunks: + audio_value = (BASE_SR, np.concatenate(audio_chunks)) + else: + audio_value = None + snapshot = _webui_submit_snapshot("") + yield snapshot[0], audio_value, *snapshot[1:] + except Exception as e: + _append_webui_log( + f"[WEBUI ERROR] {format_error(e, celune.dev)}", + "error", + ) + snapshot = _webui_submit_snapshot("") + yield snapshot[0], None, *snapshot[1:] + + +def _webui_cycle_voice() -> tuple[ + str, + str, + str, + dict[str, object], + dict[str, object], + dict[str, object], +]: + """Cycle to the next available Celune voice from the browser UI.""" + celune = require_celune() + if len(celune.voices) < 2: + return _webui_snapshot() + + current_voice = celune.current_voice or celune.voices[0] + current_index = ( + celune.voices.index(current_voice) if current_voice in celune.voices else -1 + ) + next_voice = celune.voices[(current_index + 1) % len(celune.voices)] + api_log("VOICE(WEBUI)", next_voice) + + if not celune.set_voice_and_wait(next_voice): + _append_webui_log("I can't change my voice right now.", "error") + + return _webui_snapshot() + + +def _build_webui() -> gr.Blocks: + """Create the browser UI mounted by the API.""" + _configure_webui_theme() + with gr.Blocks( + title=APP_NAME, + fill_height=True, + ) as demo: + gr.HTML(webui_theme_style) + with gr.Column(elem_id="celune-shell"): + gr.HTML( + textwrap.dedent( + f""" +
+
+
{APP_NAME}
+
+
+ """ + ) + ) + logs = gr.HTML(_webui_logs_html()) + with gr.Row(elem_id="celune-input-row"): + input_box = gr.Textbox( + value="", + lines=1, + max_lines=4, + show_label=False, + placeholder="Please wait", + container=False, + elem_id="celune-input", + scale=8, + interactive=False, + ) + with gr.Row(elem_id="celune-actions", scale=2): + voice_button = gr.Button( + value="Balanced", + elem_id="celune-style", + scale=1, + min_width=0, + interactive=False, + ) + send_button = gr.Button( + value="Send", + elem_id="celune-send", + scale=1, + min_width=0, + interactive=False, + ) + with gr.Row(elem_id="celune-footer"): + status = gr.HTML(_webui_status_html(), elem_id="celune-status") + resources = gr.HTML( + _webui_resources_html(), + elem_id="celune-resources", + ) + gr.HTML( + textwrap.dedent(f""" +

+ Usage may differ. Some {APP_NAME} features may not be available. +

+ """) + ) + audio = gr.Audio( + value=None, + type="numpy", + autoplay=True, + show_label=False, + interactive=False, + visible="hidden", + elem_id="celune-audio", + ) + timer = gr.Timer(value=WEBUI_POLL_INTERVAL_SECONDS) + timer.tick( # pylint: disable=E1101 + _webui_snapshot, + outputs=[logs, status, resources, voice_button, send_button, input_box], + show_progress="hidden", + ) + demo.load( # pylint: disable=E1101 + _webui_snapshot, + outputs=[logs, status, resources, voice_button, send_button, input_box], + show_progress="hidden", + ) + input_box.submit( # pylint: disable=E1101 + _webui_speak, + inputs=[input_box], + outputs=[ + input_box, + audio, + logs, + status, + resources, + voice_button, + send_button, + ], + show_progress="hidden", + ) + send_button.click( # pylint: disable=E1101 + _webui_speak, + inputs=[input_box], + outputs=[ + input_box, + audio, + logs, + status, + resources, + voice_button, + send_button, + ], + show_progress="hidden", + ) + voice_button.click( # pylint: disable=E1101 + _webui_cycle_voice, + outputs=[logs, status, resources, voice_button, send_button, input_box], + show_progress="hidden", + ) + + return demo + + class RootResponse(BaseModel): """Response returned by the API root endpoint.""" @@ -432,70 +1375,19 @@ def favicon() -> FileResponse: """ return FileResponse( - # this is a symbolic link to the in Celune.AppDir/ - Path(__file__).parents[1] / "resources" / "branding" / "celune.png", - media_type="image/png", + project_root() / "resources" / "celune.ico", + media_type="image/x-icon", ) -@api.get("/") -def root() -> HTMLResponse: - """Root page. +@api.get("/", include_in_schema=False) +def root() -> RedirectResponse: + """Redirect the API root to Celune's browser UI. Returns: - HTMLResponse: The app root page as HTML. + RedirectResponse: Redirect response pointing at the mounted WebUI. """ - - return HTMLResponse( - textwrap.dedent( - f""" - - - - {APP_NAME} - - - - -
-

Nothing Usable

-

The API is functioning correctly. Please return to the app to talk to me.

-
- - - """ - ) - ) + return RedirectResponse(url="/ui") @api.get("/v1", response_model=RootResponse) @@ -745,8 +1637,20 @@ def chunks() -> Iterator[bytes]: ) +api = gr.mount_gradio_app( + api, + _build_webui(), + path="/ui", + footer_links=[], + favicon_path=str(project_root() / "resources" / "celune.ico"), + show_error=True, + css=WEBUI_CSS, + head=WEBUI_HEAD, +) + + def run_api( - celune: Optional["Celune"] = None, + celune: Optional[Celune] = None, host: Optional[str] = None, port: int = 2060, token: Optional[str] = None, diff --git a/celune/assets/chord.wav b/celune/assets/chord.wav new file mode 100644 index 0000000..8136a13 Binary files /dev/null and b/celune/assets/chord.wav differ diff --git a/celune/backends/__init__.py b/celune/backends/__init__.py index ae0a3ff..ca7c68e 100644 --- a/celune/backends/__init__.py +++ b/celune/backends/__init__.py @@ -5,13 +5,15 @@ from importlib.metadata import version, PackageNotFoundError from typing import Callable, Union, Optional -from .base import BackendModel, CeluneBackend +from ..typing.backends import BackendModel +from .base import CeluneBackend __all__ = ["BackendModel", "CeluneBackend", "get_version", "resolve_backend"] BACKENDS = { "mini": ("celune.backends.mini", "Mini"), "qwen3": ("celune.backends.qwen3", "Qwen3"), + "dotstts": ("celune.backends.dotstts", "DotsTtsMF"), "voxcpm2": ("celune.backends.voxcpm2", "VoxCPM2"), } diff --git a/celune/backends/base.py b/celune/backends/base.py index 7fa00c2..69aab2a 100644 --- a/celune/backends/base.py +++ b/celune/backends/base.py @@ -6,29 +6,34 @@ import glob import random import secrets +import threading import contextlib from pathlib import Path -from collections.abc import Iterator from abc import ABC, abstractmethod -from typing import Callable, Optional, Protocol, Mapping, TypeVar, Generic +from collections.abc import Iterator, Generator +from typing import Callable, Optional, Mapping, Generic import torch import numpy as np import numpy.typing as npt -from huggingface_hub.constants import HF_HUB_CACHE from huggingface_hub import snapshot_download +from huggingface_hub.constants import HF_HUB_CACHE from ..utils import discard from ..constants import N_A_NUMERIC -from ..exceptions import BackendError from ..cevoice import default_loader +from ..exceptions import BackendError +from ..typing.backends import BackendModel, ModelT - -class BackendModel(Protocol): - """Opaque backend model protocol for backend-independent storage.""" +__all__ = [ + "BackendModel", + "CeluneBackend", + "cached_hf_snapshot_path", + "local_hf_offline_mode", +] -ModelT = TypeVar("ModelT", bound=BackendModel) +_HF_HUB_OFFLINE_LOCK = threading.Lock() def cached_hf_snapshot_path( @@ -65,6 +70,32 @@ def cached_hf_snapshot_path( return False, None +@contextlib.contextmanager +def local_hf_offline_mode(enabled: bool = True) -> Generator[None, None, None]: + """Temporarily set ``HF_HUB_OFFLINE`` while serializing process-global access. + + Args: + enabled: Whether to enable Hugging Face offline mode for the guarded block. + + Yields: + None: Control back to the guarded caller while the environment mutation is active. + """ + if not enabled: + yield + return + + with _HF_HUB_OFFLINE_LOCK: + previous_offline = os.environ.get("HF_HUB_OFFLINE") + try: + os.environ["HF_HUB_OFFLINE"] = "1" + yield + finally: + if previous_offline is None: + os.environ.pop("HF_HUB_OFFLINE", None) + else: + os.environ["HF_HUB_OFFLINE"] = previous_offline + + class CeluneBackend(ABC, Generic[ModelT]): """Base class for Celune speech backends.""" @@ -94,7 +125,7 @@ def __init__( @staticmethod def _reference_wave_path(name: str) -> Path: - """Return a materialized path for a reference WAV from the active CEVOICE pack.""" + """Return a materialized path for a reference WAV from the active CEVOICE/CECHAR pack.""" loader = default_loader() if loader is None: raise BackendError( @@ -103,7 +134,7 @@ def _reference_wave_path(name: str) -> Path: return loader.materialize(name, "wav") def _validate_refs(self) -> None: - """Validate reference audio files found in the current CEVOICE pack.""" + """Validate reference audio files found in the current CEVOICE/CECHAR pack.""" loader = default_loader() if loader is None: return @@ -232,38 +263,6 @@ def should_reload_for_language(self, lang: Optional[str]) -> bool: discard(lang) return False - def generation_progress_total(self, text: Optional[str] = None) -> Optional[int]: - """Return the backend's maximum streaming generation steps, if known. - - Args: - text: Optional text for backends whose generation budget depends on input token length. - - Returns: - Optional[int]: Maximum generated codec/token steps for one text chunk, or ``None`` when the backend does not - expose a stable limit. - """ - # this is a base implementation so we don't use the parameters - discard(text) - - @staticmethod - def generation_progress_steps(timing: Optional[dict]) -> int: - """Return how many generation steps a streamed chunk represents. - - Args: - timing: Optional backend timing metadata yielded with the audio chunk. - - Returns: - int: Number of generated codec/token steps represented by the chunk. - """ - if not timing: - return 1 - - steps = timing.get("chunk_steps") - if isinstance(steps, int) and steps > 0: - return steps - - return 1 - def load_default_model(self) -> ModelT: """Load the configured default model for this backend. diff --git a/celune/backends/dotstts.py b/celune/backends/dotstts.py new file mode 100644 index 0000000..2ff7d57 --- /dev/null +++ b/celune/backends/dotstts.py @@ -0,0 +1,389 @@ +# SPDX-License-Identifier: MIT +"""dots.tts MeanFlow backend implementation for Celune.""" + +import os +import contextlib +from collections.abc import Iterator +from typing import Callable, Optional, Mapping, Generator + +import torch +import numpy as np +import numpy.typing as npt +from dots_tts.runtime import DotsTtsRuntime + +try: + import loguru +except ModuleNotFoundError: + loguru_logger = None +else: + loguru_logger = loguru.logger + +from ..utils import custom_assert +from ..exceptions import BackendError +from ..cevoice import default_loader, CEVoiceLoader +from .base import CeluneBackend, cached_hf_snapshot_path, local_hf_offline_mode + + +class DotsTtsMF(CeluneBackend[DotsTtsRuntime]): + """Celune dots.tts MeanFlow backend.""" + + name: str = "dotstts" + uses_voice_bundles: bool = True + chunk_rate: float = 6.25 + max_new_tokens: int = 512 + supported_languages: tuple[str, ...] = ( + "ar", + "my", + "zh-cn", + "da", + "nl", + "en", + "fi", + "fr", + "de", + "el", + "he", + "hi", + "id", + "it", + "ja", + "km", + "ko", + "lo", + "ms", + "no", + "pl", + "pt", + "ru", + "es", + "sw", + "sv", + "tl", + "th", + "tr", + "vi", + ) + + voice_models: Optional[Mapping[str, str]] = { + "balanced": "rednote-hilab/dots.tts-mf", + "calm": "rednote-hilab/dots.tts-mf", + "bold": "rednote-hilab/dots.tts-mf", + "upbeat": "rednote-hilab/dots.tts-mf", + } + default_voice: Optional[str] = "balanced" + + def __init__(self, log: Callable[[str, str], None]) -> None: + super().__init__(log=log) + self._validate_refs() + + @staticmethod + def _require_compatible_bundle() -> tuple[CEVoiceLoader, tuple[str, ...]]: + """Return the active CEVOICE/CECHAR loader and its usable voice names.""" + loader = default_loader() + custom_assert( + loader is not None, + BackendError( + "backend 'dotstts' requires a compatible CEVOICE/CECHAR package " + "with at least one valid voice identifier" + ), + ) + assert loader is not None + + voice_names = tuple( + voice + for voice in loader.bundle.voice_order + if ( + isinstance(voice, str) + and voice.strip() + and voice in loader.bundle.voices + and isinstance(loader.bundle.voices[voice].get("reference_text"), str) + and bool(str(loader.bundle.voices[voice]["reference_text"]).strip()) + ) + ) + custom_assert( + bool(voice_names), + BackendError( + "backend 'dotstts' requires a compatible CEVOICE/CECHAR package " + "with at least one valid voice identifier" + ), + ) + assert bool(voice_names) + + return loader, voice_names + + def _validate_refs(self) -> None: + """Validate dots.tts reference audio files from the active CEVOICE/CECHAR pack.""" + loader, voice_names = self._require_compatible_bundle() + for name in voice_names: + loader.materialize(name, "wav") + + @property + def voices(self) -> list[str]: + """Return the voice names exposed by the active CEVOICE/CECHAR pack. + + Returns: + list[str]: The list of available voices to use from current CEVOICE/CECHAR pack. + """ + _, voice_names = self._require_compatible_bundle() + return list(voice_names) + + def model_id_for_voice(self, voice: str) -> str: + """Resolve a voice from the active pack to the shared dots.tts model. + + Args: + voice: The voice name to resolve. + + Returns: + str: A resolved model name for this voice. + """ + _, voice_names = self._require_compatible_bundle() + custom_assert( + voice in voice_names, + ValueError(f"{self.name} cannot resolve a model for voice '{voice}'"), + ) + assert voice in voice_names + return self.default_model_id + + def resolve_generation_language(self, lang: Optional[str]) -> Optional[str]: + """Normalize generation language tags to dots.tts-friendly values. + + Args: + lang: The requested language identifier, if any. + + Returns: + Optional[str]: The normalized backend-facing language identifier. + """ + if lang is None: + return None + + normalized = lang.strip().lower() + if not normalized or normalized == "auto": + return None + if normalized.startswith("zh"): + return "zh" + return normalized + + @staticmethod + @contextlib.contextmanager + def _suppress_backend_output() -> Generator[None, None, None]: + """Suppress unnecessary backend output.""" + with open(os.devnull, "w", encoding="utf-8") as devnull: + disabled_loguru = False + if loguru_logger is not None: + with contextlib.suppress(Exception): + loguru_logger.disable("dots_tts") + disabled_loguru = True + + try: + with contextlib.redirect_stdout(devnull): + with contextlib.redirect_stderr(devnull): + yield + finally: + if disabled_loguru and loguru_logger is not None: + with contextlib.suppress(Exception): + loguru_logger.enable("dots_tts") + + def model_is_available_locally( + self, model: str, lang: Optional[str] = None + ) -> tuple[bool, Optional[str]]: + """Check if a model is already available in the Hugging Face cache. + + Args: + model: The Hugging Face repository ID to inspect. + lang: The language identifier for differentiating models by language. + + Returns: + tuple[bool, Optional[str]]: A cache availability flag and the resolved snapshot path when present. + """ + del lang + return cached_hf_snapshot_path( + model, + [ + "config.json", + "*.safetensors", + "tokenizer_config.json", + ], + ) + + def load_model(self, model_id: str, **kwargs) -> DotsTtsRuntime: + """Load the given dots.tts model. + + Args: + model_id: The dots.tts model repository ID to load. + kwargs: Additional keyword arguments to use while loading dots.tts. + + Returns: + DotsTtsRuntime: The loaded dots.tts runtime instance. + """ + available, path = self.model_is_available_locally(model_id) + precision = kwargs.get("precision", "bfloat16") + optimize = bool(kwargs.get("optimize", False)) + max_generate_length = int( + kwargs.get("max_generate_length", self.max_new_tokens) + ) + + target = path if available and path is not None else model_id + if target == model_id: + self.log("Downloading TTS model...", "info") + + with local_hf_offline_mode(available and path is not None): + with self._suppress_backend_output(): + self.model = DotsTtsRuntime.from_pretrained( + target, + precision=precision, + optimize=optimize, + max_generate_length=max_generate_length, + ) + + return self.model + + @staticmethod + def _to_numpy_audio(chunk: torch.Tensor) -> npt.NDArray[np.float32]: + """Convert one streamed torch chunk to a Celune-compatible audio array.""" + audio = chunk.detach().float().cpu().numpy() + audio = np.asarray(audio, dtype=np.float32).reshape(-1) + return audio + + def generate_stream( + self, model: DotsTtsRuntime, **kwargs + ) -> Iterator[tuple[npt.NDArray[np.float32], int, Optional[dict]]]: + """Generate Celune-compatible audio chunks. + + Args: + model: The loaded dots.tts runtime instance. + kwargs: Streaming generation keyword arguments to use. + + Returns: + Iterator[tuple[npt.NDArray[np.float32], int, Optional[dict]]]: An iterator of dots.tts streaming audio + chunks. + + Raises: + ValueError: The requested voice is unsupported, or input text is empty. + """ + voice = kwargs.pop("voice", self.default_voice) + instruct = kwargs.pop("instruct", None) + language = self.resolve_generation_language(kwargs.pop("language", None)) + chunk_size = max(1, int(kwargs.pop("chunk_size", 1))) + text = kwargs.pop("text", None) + + kwargs.pop("temperature", None) + kwargs.pop("top_k", None) + kwargs.pop("top_p", None) + kwargs.pop("repetition_penalty", None) + + if not text: + raise ValueError("expected text to say") + + if instruct: + text = f"({instruct}) {text}" + + try: + loader, _ = self._require_compatible_bundle() + if voice not in loader.bundle.voices: + voice = next(iter(loader.bundle.voices), None) + if voice is None: + raise ValueError( + f"backend '{self.name}' requires at least one voice in the active pack" + ) + ref_wav = loader.materialize(voice, "wav") + configured_ref_text = loader.bundle.voices[voice].get("reference_text") + ref_text = ( + configured_ref_text if isinstance(configured_ref_text, str) else "" + ) + except KeyError as e: + raise ValueError( + f"unknown voice '{voice}' for backend '{self.name}'" + ) from e + + self._apply_seed() + + stream = None + try: + with self._suppress_backend_output(): + stream = model.generate_stream( + text=text, + prompt_audio_path=str(ref_wav), + prompt_text=ref_text, + language=language, + speaker_scale=float(kwargs.pop("speaker_scale", 1.5)), + ode_method="euler", + num_steps=4, + normalize_text=False, + **kwargs, + ) + + batch: list[npt.NDArray[np.float32]] = [] + chunk_index = 0 + total_steps = 0 + pending_audio: Optional[npt.NDArray[np.float32]] = None + pending_steps = 0 + + for chunk in stream: + batch.append(self._to_numpy_audio(chunk)) + if len(batch) < chunk_size: + continue + + if pending_audio is not None: + total_steps += pending_steps + yield ( + pending_audio, + int(getattr(model, "sample_rate", 48000)), + { + "backend": self.name, + "chunk_index": chunk_index, + "chunk_steps": pending_steps, + "total_steps_so_far": total_steps, + "is_final": False, + }, + ) + chunk_index += 1 + + pending_audio = np.concatenate(batch) + pending_steps = len(batch) + batch.clear() + + if batch: + if pending_audio is not None: + total_steps += pending_steps + yield ( + pending_audio, + int(getattr(model, "sample_rate", 48000)), + { + "backend": self.name, + "chunk_index": chunk_index, + "chunk_steps": pending_steps, + "total_steps_so_far": total_steps, + "is_final": False, + }, + ) + chunk_index += 1 + + total_steps += len(batch) + yield ( + np.concatenate(batch), + int(getattr(model, "sample_rate", 48000)), + { + "backend": self.name, + "chunk_index": chunk_index, + "chunk_steps": len(batch), + "total_steps_so_far": total_steps, + "is_final": True, + }, + ) + elif pending_audio is not None: + total_steps += pending_steps + yield ( + pending_audio, + int(getattr(model, "sample_rate", 48000)), + { + "backend": self.name, + "chunk_index": chunk_index, + "chunk_steps": pending_steps, + "total_steps_so_far": total_steps, + "is_final": True, + }, + ) + finally: + if stream is not None and hasattr(stream, "close"): + with contextlib.suppress(Exception): + stream.close() diff --git a/celune/backends/mini.py b/celune/backends/mini.py index a0e81ed..6d02f90 100644 --- a/celune/backends/mini.py +++ b/celune/backends/mini.py @@ -1,59 +1,24 @@ # SPDX-License-Identifier: MIT """Pocket TTS backend implementation for Celune.""" -import contextlib import tempfile +import contextlib from pathlib import Path from collections.abc import Iterator, Mapping -from typing import Callable, Optional, Protocol, cast +from typing import Callable, Optional, cast import yaml -import torch import numpy as np import numpy.typing as npt from pocket_tts import TTSModel from huggingface_hub import snapshot_download -from .base import CeluneBackend, cached_hf_snapshot_path -from ..cevoice import default_loader, CEVoiceLoader -from ..exceptions import BackendError from ..paths import temp_data_dir from ..utils import custom_assert - -type MiniPromptState = dict[str, dict[str, torch.Tensor]] - - -class MiniModel(Protocol): - """Pocket TTS model surface used by Celune's mini backend.""" - - sample_rate: int - - def get_state_for_audio_prompt(self, audio_conditioning: str) -> MiniPromptState: - """Return a reusable prompt state for one reference audio path. - - Args: - audio_conditioning: The audio conditioning string value. - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - def generate_audio_stream( - self, - model_state: MiniPromptState, - text_to_generate: str, - ) -> Iterator[torch.Tensor]: - """Yield streamed audio chunks for one prompt state and text. - - Args: - model_state: The current prompt state. - text_to_generate: The text to be generated. - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") +from ..exceptions import BackendError +from ..cevoice import default_loader, CEVoiceLoader +from ..typing.backends import MiniModel, MiniPromptState +from .base import CeluneBackend, cached_hf_snapshot_path class Mini(CeluneBackend[TTSModel]): @@ -154,7 +119,7 @@ def resolve_generation_language(self, lang: Optional[str]) -> str: lang: The language identifier for differentiating models by language. Returns: - A language-specific model identifier or "en" if no match was found. + str: A language-specific model identifier, or ``"en"`` if no match was found. """ alias_to_code: Mapping[str, str] = { "english": "en", @@ -335,7 +300,7 @@ def should_reload_for_language(self, lang: Optional[str]) -> bool: lang: The language identifier for differentiating models by language. Returns: - Whether Celune should reload a new Pocket TTS language model. + bool: Whether Celune should reload a new Pocket TTS language model. """ return self.resolve_generation_language(lang) != self._loaded_language diff --git a/celune/backends/qwen3.py b/celune/backends/qwen3.py index b9c7c45..ccae445 100644 --- a/celune/backends/qwen3.py +++ b/celune/backends/qwen3.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: MIT """Qwen3 backend implementation for Celune.""" -import os import contextlib from collections.abc import Iterator from typing import Callable, Optional @@ -10,10 +9,10 @@ import numpy.typing as npt from faster_qwen3_tts import FasterQwen3TTS, __version__ as qwen3_ver -from ..cevoice import default_loader, CEVoiceLoader -from ..exceptions import BackendError from ..utils import custom_assert -from .base import CeluneBackend, cached_hf_snapshot_path +from ..exceptions import BackendError +from ..cevoice import default_loader, CEVoiceLoader +from .base import CeluneBackend, cached_hf_snapshot_path, local_hf_offline_mode class Qwen3(CeluneBackend[FasterQwen3TTS]): @@ -145,17 +144,6 @@ def model_id_for_voice(self, voice: str) -> str: return self.clone_model_id - def generation_progress_total(self, text: Optional[str] = None) -> int: - """Return the Qwen3 streaming generation context length. - - Args: - text: The text to check context usage of with this value. - - Returns: - int: The max context length. - """ - return self.max_new_tokens - def model_is_available_locally( self, model: str, lang: Optional[str] = None ) -> tuple[bool, Optional[str]]: @@ -191,15 +179,8 @@ def load_model(self, model_id: str, **kwargs) -> FasterQwen3TTS: available, path = self.model_is_available_locally(model_id) if available and path is not None: - previous_offline = os.environ.get("HF_HUB_OFFLINE") - try: - os.environ["HF_HUB_OFFLINE"] = "1" + with local_hf_offline_mode(): self.model = FasterQwen3TTS.from_pretrained(path) - finally: - if previous_offline is None: - os.environ.pop("HF_HUB_OFFLINE", None) - else: - os.environ["HF_HUB_OFFLINE"] = previous_offline return self.model self.log("Downloading TTS model...", "info") @@ -255,7 +236,7 @@ def generate_stream( **kwargs, ) - for chunk in stream: # pylint: disable=R1737 + for chunk in stream: audio_chunk, sample_rate, timing = chunk if timing is not None: timing = dict(timing) diff --git a/celune/backends/voxcpm2.py b/celune/backends/voxcpm2.py index bf7f1e6..111c1ee 100644 --- a/celune/backends/voxcpm2.py +++ b/celune/backends/voxcpm2.py @@ -11,12 +11,12 @@ import numpy.typing as npt from voxcpm import VoxCPM -from ..constants import BASE_SR from . import get_version -from ..cevoice import default_loader, CEVoiceLoader -from ..exceptions import BackendError +from ..constants import BASE_SR from ..utils import custom_assert -from .base import CeluneBackend, cached_hf_snapshot_path +from ..exceptions import BackendError +from ..cevoice import default_loader, CEVoiceLoader +from .base import CeluneBackend, cached_hf_snapshot_path, local_hf_offline_mode class VoxCPM2(CeluneBackend[VoxCPM]): @@ -197,21 +197,13 @@ def load_model(self, model_id: str, **kwargs) -> VoxCPM: torch.use_deterministic_algorithms(True) if available and path is not None: - os.environ["HF_HUB_OFFLINE"] = "1" - with self._suppress_backend_output(): - previous_offline = os.environ.get("HF_HUB_OFFLINE") - try: - os.environ["HF_HUB_OFFLINE"] = "1" + with local_hf_offline_mode(): + with self._suppress_backend_output(): self.model = VoxCPM.from_pretrained( path, load_denoiser=kwargs.get("load_denoiser", False), optimize=kwargs.get("optimize", False), ) - finally: - if previous_offline is None: - os.environ.pop("HF_HUB_OFFLINE", None) - else: - os.environ["HF_HUB_OFFLINE"] = previous_offline return self.model @@ -246,6 +238,11 @@ def generate_stream( kwargs.pop("language", None) chunk_size = kwargs.pop("chunk_size", 1) + kwargs.pop("temperature", None) + kwargs.pop("top_k", None) + kwargs.pop("top_p", None) + kwargs.pop("repetition_penalty", None) + try: loader, _ = self._require_compatible_bundle() ref_wav = loader.materialize(voice, "wav") @@ -286,9 +283,10 @@ def generate_stream( stream = model.generate_streaming( text, reference_wav_path=ref_wav, - inference_timesteps=6, + inference_timesteps=4, cfg_value=cfg, max_len=self.max_new_tokens, + **kwargs, ) batch: list[npt.NDArray[np.float32]] = [] diff --git a/celune/celune.py b/celune/celune.py index 5f726a1..a48030b 100644 --- a/celune/celune.py +++ b/celune/celune.py @@ -7,31 +7,54 @@ import threading import contextlib from pathlib import Path -from collections.abc import Iterator -from typing import Optional, Callable, Protocol, Union, Any, cast +from typing import Optional, Callable, Union, cast import torch import numpy as np import numpy.typing as npt -import sounddevice as sd from transformers.modeling_utils import PreTrainedModel from transformers.utils.logging import disable_progress_bar from transformers.utils import logging as hf_logging -from transformers.tokenization_utils_base import BatchEncoding, PreTrainedTokenizerBase +from transformers.tokenization_utils_base import PreTrainedTokenizerBase from huggingface_hub.utils import disable_progress_bars -from .backends.qwen3 import Qwen3 from . import __version__ +from .dataclasses.celune import ( + CELUNE_CONSTANT_PROPERTIES, + CELUNE_FORWARDED_PROPERTIES, + CeluneAudioState, + CeluneBackendState, + CeluneCallbackState, + CeluneModelState, + CelunePipelineState, + CeluneRuntimeState, + CeluneVoiceState, +) +from .dataclasses.properties import ( + bind_constant_properties, + bind_forwarded_properties, +) from .chroma import AudioRGBGlow +from .backends.qwen3 import Qwen3 from .extensions.base import CeluneContext from .extensions.manager import CeluneExtensionManager -from .dsp import StreamingPedalboardReverb from .config import Config, config_bool, config_value +from .paths import project_root from .runtime import log_runtime_banner, validate_runtime -from .backends import BackendModel, CeluneBackend, resolve_backend +from .backends import CeluneBackend, resolve_backend from .exceptions import NotAvailableError, WarmupError, BackendError from .modeling import normalizer_device, load_normalizer_components -from .constants import APP_NAME, JSONSerializable, NORMALIZER_MODEL_ID, PipelineStates +from .constants import APP_NAME, JSONSerializable, NORMALIZER_MODEL_ID +from .typing.celune import ( + CeluneStateAccessors, + Generative, + InputStateCallback, + MessageCallback, + NormalizerTokenizer, + ProgressCallback, + ReleasableObject, + VoiceLockStateCallback, +) from .utils import format_number, format_error, discard, is_port_usable, custom_assert from .vram import ( QWEN3_0_6B_MODEL, @@ -50,7 +73,6 @@ persona_quantization, ) from .cevoice import ( - CEVoicePersona, announce_default_bundle, bundle_character_name, default_bundle_path, @@ -59,9 +81,6 @@ select_voice_bundle, ) from .pipeline import ( - AudioQueueItem, - SpeechStreamQueue, - TextQueueItem, acquire_pipeline, clear_queue, close as close_pipeline, @@ -76,8 +95,9 @@ say as say_pipeline, think as think_pipeline, split_text, - play_readiness_signal, + play_signal, ) +from .typing.pipeline import SpeechStreamQueue def _config_str(value: JSONSerializable) -> Optional[str]: @@ -96,61 +116,7 @@ def _config_int(value: JSONSerializable, default: int) -> int: raise TypeError("config value cannot be converted to int") -class _SupportsClose(Protocol): - def close(self) -> None: - """Fake return value of close(). - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - -class _SupportsUnload(Protocol): - def unload(self) -> None: - """Fake return value of unload(). - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - -class _Generative(Protocol): - def generate(self, **kwargs: Any) -> torch.Tensor: - """Fake return value of generate(). - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - def device(self) -> Union[torch.device, str]: - """Fake return value of device(). - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - def parameters(self) -> Iterator[torch.nn.Parameter]: - """Fake return value of device(). - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - -type _ReleasableObject = Union[ - _SupportsClose, - _SupportsUnload, - PreTrainedModel, - PreTrainedTokenizerBase, -] - - -def _release_loaded_object(value: _ReleasableObject) -> None: +def _release_loaded_object(value: ReleasableObject) -> None: """Best-effort release hook for one loaded runtime object.""" close = getattr(value, "close", None) if callable(close): @@ -164,85 +130,7 @@ def _release_loaded_object(value: _ReleasableObject) -> None: unload() -class NormalizerTokenizer(Protocol): - """Tokenizer behavior CeluneNorm uses during normalization.""" - - unk_token_id: Optional[int] - pad_token_id: Optional[int] - eos_token_id: Optional[int] - - def convert_tokens_to_ids(self, tokens: str) -> Optional[int]: - """Convert one token to its integer ID. - - Args: - tokens: A token to convert to ID. - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - def __call__( - self, - text: str, - *, - return_tensors: str, - add_special_tokens: bool, - ) -> BatchEncoding: - """Tokenize text for model input.""" - raise NotImplementedError("protocol not defined") - - def decode( - self, - token_ids: torch.Tensor, - *, - skip_special_tokens: bool, - ) -> Union[str, list[str]]: - """Decode generated token IDs. - - Args: - token_ids: Token IDs to decode. - skip_special_tokens: Whether special tokens should be skipped while decoding. - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - -class MessageCallback(Protocol): - """Callback accepting a message and optional severity.""" - - def __call__(self, msg: str, severity: str = "info") -> None: - """Handle a message emitted by Celune.""" - raise NotImplementedError("protocol not defined") - - -class InputStateCallback(Protocol): - """Callback accepting either positional or named lock state.""" - - def __call__(self, locked: bool) -> None: - """Handle input lock-state changes.""" - raise NotImplementedError("protocol not defined") - - -class VoiceLockStateCallback(Protocol): - """Callback accepting either positional or named lock state.""" - - def __call__(self, locked: bool) -> None: - """Handle voice lock-state changes.""" - raise NotImplementedError("protocol not defined") - - -class ProgressCallback(Protocol): - """Callback accepting progress and total values.""" - - def __call__(self, progress: Optional[float], total: Optional[float]) -> None: - """Handle a progress update emitted by Celune.""" - raise NotImplementedError("protocol not defined") - - -class Celune: +class Celune(CeluneStateAccessors): """The character engine for Celune.""" _instance: Optional["Celune"] = None @@ -268,27 +156,34 @@ def __init__( if Celune._instance is not None: raise RuntimeError(f"can only instantiate {self.__class__.__name__} once") - self.log_callback: MessageCallback = log_callback or self._noop_message - self.status_callback: MessageCallback = status_callback or self._noop_message - self.error_callback = error_callback or (lambda error: None) - self.idle_callback = idle_callback or (lambda: None) - self.queue_avail_callback = queue_avail_callback or (lambda: None) - self.voice_changed_callback = voice_changed_callback or (lambda name: None) - self.change_input_state_callback: InputStateCallback = ( - change_input_state_callback or self._noop_input_state - ) - self.change_voice_lock_state_callback: VoiceLockStateCallback = ( - change_voice_lock_state_callback or self._noop_voice_lock_state - ) - self.progress_callback: ProgressCallback = ( - progress_callback or self._noop_progress + self._callbacks = CeluneCallbackState( + log_callback=log_callback or self._noop_message, + status_callback=status_callback or self._noop_message, + error_callback=error_callback or (lambda error: None), + idle_callback=idle_callback or (lambda: None), + queue_avail_callback=queue_avail_callback or (lambda: None), + voice_changed_callback=voice_changed_callback or (lambda name: None), + change_input_state_callback=( + change_input_state_callback or self._noop_input_state + ), + change_voice_lock_state_callback=( + change_voice_lock_state_callback or self._noop_voice_lock_state + ), + progress_callback=(progress_callback or self._noop_progress), ) + self._backend_state = CeluneBackendState(config=config) + self._model_state = CeluneModelState() + self._voice_state = CeluneVoiceState() + self._pipeline_state = CelunePipelineState() + self._audio_state = CeluneAudioState() + self._runtime_state = CeluneRuntimeState() + self._pipeline_state.model_ready.set() + self._pipeline_state.playback_done.set() + self.config = config select_voice_bundle(_config_str(config_value(config, "voice_bundle"))) preset = resolve_vram_preset(config) - self._backend_spec: Optional[Union[str, type[CeluneBackend]]] = None - self._backend_kwargs: dict[str, JSONSerializable] = {} if tts_backend is None: tts_backend = preset.default_backend @@ -373,68 +268,11 @@ def __init__( ) self.language = language - - self.model: Optional[BackendModel] = None - self.model_name = "" - self.llm: Optional[PreTrainedModel] = None - self.tokenizer: Optional[PreTrainedTokenizerBase] = None - self._last_warmup_error: Optional[Exception] = None - self._normalizer_load_epoch = 0 - - self.current_voice: Optional[str] = None - self.current_character: Optional[str] = None - self.current_character_persona: Optional[CEVoicePersona] = None - self.voice_bundle_is_default = True - self.persona_history: list[dict[str, str]] = [] - self.persona_attachments: list[dict[str, str]] = [] - self.voices: tuple[str, ...] = () - self.voice_prompt: Optional[str] = None - - self.text_queue: queue.Queue[TextQueueItem] = queue.Queue() - self.audio_queue: queue.Queue[AudioQueueItem] = queue.Queue() - - self._playback_thread: Optional[threading.Thread] = None - self._generation_thread: Optional[threading.Thread] = None - self._api_thread: Optional[threading.Thread] = None - self._persona_thread: Optional[threading.Thread] = None - self._queue_lock = threading.Lock() - self._utterance_force_stop = threading.Event() - self.regenerate = False - - self._stream: Optional[sd.OutputStream] = None - self._current_sr: Optional[int] = None - self._audio_unavailable = False - self.can_use_rubberband = True - self.speed: float = 1.0 - self.reverb = StreamingPedalboardReverb() - - self.locked = True - self.loaded = False - self.sleeping = False - self.recently_saved: Optional[str] = None - self.kept_sfx_audio: Optional[npt.NDArray[np.float32]] = None - - self._last_flavor: Optional[str] = None - self._ready_announced = False - self._model_ready = threading.Event() - self._model_ready.set() - self._exit_requested = False - self._playback_done = threading.Event() - self._playback_done.set() - self._say_lock = threading.Lock() - self._wake_lock = threading.Lock() - self._model_lock = threading.RLock() - - self.cur_state = "init" - self.is_in_tutorial = False - self.dev = dev self.use_normalization = config_bool( config, "CELUNE_NORMALIZE", "use_normalizer" ) - self.extension_manager: Optional[CeluneExtensionManager] = None - glow_color = "#cebaff" loader = default_loader() if loader is not None: @@ -451,11 +289,13 @@ def __init__( self.glow = AudioRGBGlow(celune=self, color=glow_color) self.glow.start() - self.vision: Optional[PersonaClient] self.vision = self._persona_conn() Celune._instance = self + bind_forwarded_properties(locals(), CELUNE_FORWARDED_PROPERTIES) + bind_constant_properties(locals(), CELUNE_CONSTANT_PROPERTIES) + @staticmethod def _noop_message(msg: str, severity: str = "info") -> None: """Discard a message callback.""" @@ -651,6 +491,9 @@ def enter_sleep_mode(self) -> bool: self.cur_state = "sleeping" self.glow.sleep() + if not self._try_play_signal("sleeping"): + self.log_dev("Could not play the sleeping signal.", "warning") + self._ready_announced = False self.model_ready.clear() self.progress_callback(0, 1) @@ -738,6 +581,8 @@ def wake_from_sleep(self) -> bool: self.loaded = False self.log(f"[WAKE ERROR] {format_error(e, self.dev)}", "error") self.glow.fatal() + if not self._try_play_signal("error"): + self.log_dev("Could not play the error signal.", "warning") self.cur_state = "error" self.status_callback(f"{APP_NAME} could not wake", "error") self.progress_callback(0, 1) @@ -833,7 +678,7 @@ def set_voice(self, name: str) -> bool: bool: ``True`` when the reload thread was started, otherwise ``False``. """ if name not in self.voices: - # this voice was not found in the current CEVOICE pack + # this voice was not found in the current CEVOICE/CECHAR pack self.log(f"Unknown voice: {name}", "warning") return False @@ -918,7 +763,7 @@ def setup_extensions(self) -> None: log_dev=self.log_dev, ) self.extension_manager = CeluneExtensionManager(ctx) - self.extension_manager.autoload("extensions") + self.extension_manager.autoload(str(project_root() / "extensions")) self.log_dev( f"[Core] Loaded extensions: {', '.join(self.extension_manager.list_extensions())}" @@ -943,6 +788,17 @@ def log_dev(self, msg: str, severity: str = "info") -> None: if self.dev: self.log_callback(msg, severity) + def _try_play_signal(self, signal_type: str) -> bool: + """Play a runtime signal only when the playback pipeline can currently accept it.""" + playback_thread = self.playback_thread + if playback_thread is None or not playback_thread.is_alive(): + return False + + if self.locked and self._playback_done.is_set(): + self._release_pipeline() + + return play_signal(self, signal_type) + def voice_prompt_supported(self) -> bool: """Return whether the active TTS configuration supports voice prompts. @@ -986,6 +842,8 @@ def change_voice(self, voice: str) -> None: # VoxCPM2 uses the same model for all voices, so we don't have to reload every time if new_model_name != self.model_name: + if not self._try_play_signal("working"): + self.log_dev("Could not play the working signal.", "warning") self.log_dev(f"[RELOAD] Unloading model: {self.model_name}") self.unload_runtime_state(include_normalizer=False) self.log_dev(f"[RELOAD] Loading model: {new_model_name}") @@ -995,7 +853,7 @@ def change_voice(self, voice: str) -> None: if not self._warmup(): self._raise_warmup_error("warmup failed after reload") - if not play_readiness_signal(self): + if not self._try_play_signal("readiness"): self.log_dev("Could not play the readiness signal.", "warning") self.log_dev( @@ -1008,11 +866,14 @@ def change_voice(self, voice: str) -> None: self.voice_changed_callback(voice) self.log(f"Voice {voice} loaded.") self.progress_callback(1, 1) + self.cur_state = "idle" self.status_callback("Idle") except Exception as e: self.loaded = False self.log(f"[RELOAD ERROR] {format_error(e, self.dev)}", "error") self.glow.fatal() + if not self._try_play_signal("error"): + self.log_dev("Could not play the error signal.", "warning") self.status_callback(f"{APP_NAME} could not reload", "error") self.progress_callback(0, 1) self.error_callback(f"{APP_NAME} could not reload") @@ -1043,6 +904,8 @@ def load(self) -> bool: if not self.load_available_voices(): self.log("No voices were loaded.", "error") self.glow.fatal() + if not self._try_play_signal("error"): + self.log_dev("Could not play the error signal.", "warning") self.progress_callback(0, 1) self.error_callback("No voices loaded") return False @@ -1082,6 +945,8 @@ def load(self) -> bool: self.log(f"{APP_NAME} could not load the default model.", "error") self.log(format_error(e, self.dev), "error") self.glow.fatal() + if not self._try_play_signal("error"): + self.log_dev("Could not play the error signal.", "warning") self.progress_callback(0, 1) self.error_callback("Default model failed to load") return False @@ -1123,6 +988,8 @@ def load(self) -> bool: backend_name=self.backend.name, ): self.glow.fatal() + if not self._try_play_signal("error"): + self.log_dev("Could not play the error signal.", "warning") return False if self._warmup(): @@ -1150,7 +1017,7 @@ def load(self) -> bool: ) # notify readiness - if not play_readiness_signal(self): + if not self._try_play_signal("readiness"): self.log_dev("Could not play the readiness signal.", "warning") return True @@ -1340,6 +1207,9 @@ def _warmup(self) -> bool: self.log(f"[WARMUP ERROR] {format_error(e, self.dev)}", "error") self.cur_state = "error" self.glow.fatal() + + if not self._try_play_signal("error"): + self.log_dev("Could not play the error signal.", "warning") self.progress_callback(0, 1) self.error_callback(f"{APP_NAME} could not warm up") return False @@ -1365,7 +1235,7 @@ def normalize(self, text: str) -> Optional[str]: if self.llm is None or self.tokenizer is None: return None - llm = cast(_Generative, self.llm) + llm = cast(Generative, self.llm) tokenizer = cast(NormalizerTokenizer, self.tokenizer) def _run_inference() -> Optional[str]: @@ -1549,17 +1419,18 @@ def say_stream(self, text: str, save: bool = True) -> Optional[SpeechStreamQueue return None return stream_queue - def play(self, sound_path: str, keep: bool = False) -> bool: + def play(self, sound_path: str, keep: bool = False, volume: float = 1.0) -> bool: """Play a sound via Celune's pipeline. Args: sound_path: The path to the audio file to play. keep: Whether to prepend this SFX to the next saved utterance. + volume: How loud should the SFX be played at. Returns: bool: ``True`` when playback was queued successfully, otherwise ``False``. """ - return play_pipeline(self, sound_path, keep=keep) + return play_pipeline(self, sound_path, keep=keep, volume=volume) def play_audio( self, @@ -1582,7 +1453,7 @@ def play_audio( return queue_sfx_audio(self, audio, sample_rate, label, keep=keep) def close(self) -> None: - """Shut off Celune and exit.""" + """Shut off Celune and release loaded runtime state.""" try: close_pipeline(self) self._unload_persona_state() @@ -1602,156 +1473,3 @@ def _generation_worker(self) -> None: def _playback_worker(self) -> None: """Receive audio chunks and play them.""" playback_worker(self) - - @property - def stream(self) -> Optional[sd.OutputStream]: - """Get the current audio output stream. - - Returns: - Optional[sounddevice.OutputStream]: The active audio stream, if any. - """ - return self._stream - - @stream.setter - def stream(self, value: Optional[sd.OutputStream]) -> None: - """Set the current audio output stream. - - Args: - value: The new output stream object. - """ - self._stream = value - - @property - def say_lock(self): - """Get the speech pipeline lock. - - Returns: - threading.Lock: The lock guarding speech and playback state changes. - """ - return self._say_lock - - @property - def utterance_force_stop(self): - """Get the force-stop event for the current utterance. - - Returns: - threading.Event: The event used to interrupt active speech. - """ - return self._utterance_force_stop - - @property - def queue_lock(self): - """Get the queue coordination lock. - - Returns: - threading.Lock: The lock guarding queue mutations. - """ - return self._queue_lock - - @property - def force_stop_marker(self): - """Get the queue marker used to stop playback immediately. - - Returns: - PipelineStates: The sentinel inserted into the audio queue. - """ - return PipelineStates.UTTERANCE_FORCE_END - - @property - def playback_done(self): - """Get the playback completion event. - - Returns: - threading.Event: The event set when playback is idle. - """ - return self._playback_done - - @property - def model_ready(self): - """Get the model readiness event. - - Returns: - threading.Event: The event set when the speech model is ready to use. - """ - return self._model_ready - - @property - def utterance_done(self): - """Get the marker that signals utterance completion. - - Returns: - PipelineStates: The sentinel inserted when generation finishes. - """ - return PipelineStates.UTTERANCE_END - - @property - def sentinel(self): - """Get the global shutdown sentinel. - - Returns: - PipelineStates: The sentinel used to stop worker threads. - """ - return PipelineStates.TERMINATE - - @property - def generation_thread(self) -> Optional[threading.Thread]: - """Get the generation worker thread. - - Returns: - Optional[threading.Thread]: The active generation thread, if started. - """ - return self._generation_thread - - @property - def playback_thread(self) -> Optional[threading.Thread]: - """Get the playback worker thread. - - Returns: - Optional[threading.Thread]: The active playback thread, if started. - """ - return self._playback_thread - - @property - def exit_requested(self): - """Get the exit flag. - - Returns: - bool: ``True`` when Celune is shutting down, otherwise ``False``. - """ - return self._exit_requested - - @property - def model_lock(self): - """Get the model access lock. - - Returns: - threading.RLock: The lock guarding model access and reloads. - """ - return self._model_lock - - @property - def audio_unavailable(self): - """Get the audio availability flag. - - Returns: - bool: ``True`` when audio output initialization has failed. - """ - return self._audio_unavailable - - @property - def current_sr(self) -> Optional[int]: - """Get the active stream sample rate. - - Returns: - Optional[int]: The current playback sample rate, if a stream exists. - """ - return self._current_sr - - @current_sr.setter - def current_sr(self, value: Optional[int]) -> None: - """Set the active stream sample rate. - - Args: - value: The new playback sample rate. - """ - self._current_sr = value diff --git a/celune/cevoice.py b/celune/cevoice.py index 0f29b94..5fb4709 100644 --- a/celune/cevoice.py +++ b/celune/cevoice.py @@ -14,25 +14,27 @@ from typing import BinaryIO, Callable, Final, Mapping, Optional, Union, cast from .exceptions import CEVoiceError -from .constants import JSONSerializable -from .paths import temp_data_dir +from .paths import project_root, temp_data_dir +from .typing.cevoice import Manifest, ManifestValue, VoiceManifest +# Celune supports both of these specifications +# CECHAR v2 spec (Celune v4 format) MAGIC: Final[bytes] = b"CECHAR\0\0" VERSION: Final[int] = 2 +FORMAT_NAME: Final[str] = "CECHAR" + +# CEVOICE v1 spec (Celune v3.5 format) LEGACY_MAGIC: Final[bytes] = b"CEVOICE\0" LEGACY_VERSION: Final[int] = 1 -FORMAT_NAME: Final[str] = "CECHAR" LEGACY_FORMAT_NAME: Final[str] = "CEVOICE" + HEADER = struct.Struct("<8sHI") ALLOWED_ASSET_KINDS = {"wav", "pt"} -type ManifestValue = Union[JSONSerializable, "Manifest"] -type Manifest = dict[str, ManifestValue] -type VoiceManifest = dict[str, Manifest] @dataclass(frozen=True) class CEVoiceAsset: - """One binary asset stored inside a CEVOICE package.""" + """One binary asset stored inside a CEVOICE/CECHAR package.""" offset: int length: int @@ -41,7 +43,7 @@ class CEVoiceAsset: @dataclass(frozen=True) class CEVoice: - """Parsed CEVOICE package metadata and payload access.""" + """Parsed CEVOICE/CECHAR package metadata and payload access.""" path: Path metadata: Manifest @@ -49,16 +51,16 @@ class CEVoice: @classmethod def open(cls, path: Union[str, Path]) -> CEVoice: - """Parse and validate a CEVOICE package. + """Parse and validate a CEVOICE/CECHAR package. Args: - path: The CEVOICE package to load. + path: The CEVOICE/CECHAR package to load. Returns: CEVoice: The CEVoice object. Raises: - CEVoiceError: The CEVOICE package is malformed and could not be loaded. + CEVoiceError: The CEVOICE/CECHAR package is malformed and could not be loaded. """ bundle_path = Path(path) with bundle_path.open("rb") as stream: @@ -90,10 +92,10 @@ def voices(self) -> VoiceManifest: """Return the voice manifest. Returns: - VoiceManifest: The voice manifest of this CEVOICE package. + VoiceManifest: The voice manifest of this CEVOICE/CECHAR package. Raises: - CEVoiceError: The CEVOICE package does not contain a valid voice manifest. + CEVoiceError: The CEVOICE/CECHAR package does not contain a valid voice manifest. """ voices = self.metadata.get("voices") if not isinstance(voices, dict): @@ -166,7 +168,7 @@ def read_asset(self, voice: str, kind: str) -> bytes: @dataclass(frozen=True, slots=True) class PersonaIdentity: - """Identity details supplied by a CEVOICE pack.""" + """Identity details supplied by a CEVOICE/CECHAR pack.""" name: str = "" age: str = "" @@ -176,7 +178,7 @@ class PersonaIdentity: @dataclass(frozen=True, slots=True) class PersonaStyleValues: - """Baseline speaking-style values supplied by a CEVOICE pack.""" + """Baseline speaking-style values supplied by a CEVOICE/CECHAR pack.""" warmth: str = "" directness: str = "" @@ -188,7 +190,7 @@ class PersonaStyleValues: @dataclass(frozen=True, slots=True) class CEVoicePersona: - """Persona metadata supplied by a CEVOICE pack.""" + """Persona metadata supplied by a CEVOICE/CECHAR pack.""" identity: PersonaIdentity = field(default_factory=PersonaIdentity) speaking_style: str = "" @@ -224,7 +226,7 @@ def materialize(self, voice: str, kind: str, suffix: Optional[str] = None) -> Pa Path: The path to the extracted voice asset. Raises: - CEVoiceError: The CEVOICE package contains path delimiters. + CEVoiceError: The CEVOICE/CECHAR package contains path delimiters. """ key = (voice, kind) if key not in self._paths: @@ -250,19 +252,19 @@ def write_cevoice( metadata: Optional[Mapping[str, ManifestValue]] = None, voice_metadata: Optional[Mapping[str, Mapping[str, ManifestValue]]] = None, ) -> Path: - """Write a CEVOICE package from per-voice binary assets. + """Write a CEVOICE/CECHAR package from per-voice binary assets. Args: - path: The CEVOICE package to save as. - voices: The voice files to bundle into this CEVOICE package. - metadata: The metadata to bundle into this CEVOICE package. + path: The CEVOICE/CECHAR package to save as. + voices: The voice files to bundle into this CEVOICE/CECHAR package. + metadata: The metadata to bundle into this CEVOICE/CECHAR package. voice_metadata: Extra metadata stored beside each voice's assets. Returns: - Path: The path to the created CEVOICE package. + Path: The path to the created CEVOICE/CECHAR package. Raises: - CEVoiceError: The CEVOICE package contains path delimiters. + CEVoiceError: The CEVOICE/CECHAR package contains path delimiters. """ payload = bytearray() manifest_voices: VoiceManifest = {} @@ -536,10 +538,10 @@ def persona_metadata_from_manifest( """Return typed persona metadata from a CEVOICE manifest when present. Args: - metadata: The CEVOICE package manifest. + metadata: The CEVOICE/CECHAR package manifest. Returns: - Optional[CEVoicePersona]: The Persona metadata from the current CEVOICE package. + Optional[CEVoicePersona]: The Persona metadata from the current CEVOICE/CECHAR package. """ raw_persona = metadata.get("persona") if not isinstance(raw_persona, dict): @@ -575,13 +577,13 @@ def persona_metadata_from_manifest( def bundle_character_name(bundle: CEVoice) -> Optional[str]: - """Return the active character name implied by one CEVOICE package. + """Return the active character name implied by one CEVOICE/CECHAR package. Args: - bundle: The CEVOICE package to use. + bundle: The CEVOICE/CECHAR package to use. Returns: - Optional[str]: The character name from the current CEVOICE package. + Optional[str]: The character name from the current CEVOICE/CECHAR package. """ persona = persona_metadata_from_manifest(bundle.metadata) if persona is not None and persona.identity.name.strip(): @@ -613,7 +615,7 @@ def default_bundle_path() -> Path: Returns: Path: The absolute path to Celune's default voice bundle. """ - return Path(__file__).resolve().parent.parent / "voices" / "default.cevoice" + return project_root() / "voices" / "default.cevoice" def bundled_voices_dir() -> Path: @@ -622,17 +624,17 @@ def bundled_voices_dir() -> Path: Returns: Path: The absolute path to the bundled CEVOICE directory. """ - return Path(__file__).resolve().parent.parent / "voices" + return project_root() / "voices" def resolve_bundle_path(bundle: Optional[Union[str, Path]] = None) -> Path: - """Resolve a configured CEVOICE package name or path. + """Resolve a configured CEVOICE/CECHAR package name or path. Args: bundle: Either a built-in bundle name, an explicit bundle path, or ``None`` to select Celune's default bundle. Returns: - Path: The resolved CEVOICE package path. + Path: The resolved CEVOICE/CECHAR package path. """ if bundle is None: return default_bundle_path() @@ -647,13 +649,13 @@ def resolve_bundle_path(bundle: Optional[Union[str, Path]] = None) -> Path: def select_voice_bundle(bundle: Optional[Union[str, Path]] = None) -> Path: - """Select the CEVOICE package used by Celune's shared loader. + """Select the CEVOICE/CECHAR package used by Celune's shared loader. Args: bundle: Either a built-in bundle name, an explicit bundle path, or ``None`` to restore Celune's default bundle. Returns: - Path: The selected CEVOICE package path. + Path: The selected CEVOICE/CECHAR package path. """ global _DEFAULT_LOADER, _DEFAULT_LOADER_INITIALIZED global _DEFAULT_LOADER_ANNOUNCED, _DEFAULT_LOADER_FAILED, _SELECTED_BUNDLE @@ -683,7 +685,7 @@ def select_voice_bundle(bundle: Optional[Union[str, Path]] = None) -> Path: def active_bundle_path() -> Path: - """Return the currently selected CEVOICE package path. + """Return the currently selected CEVOICE/CECHAR package path. Returns: Path: The selected bundle path, or Celune's default bundle path. @@ -692,10 +694,10 @@ def active_bundle_path() -> Path: def default_loader() -> Optional[CEVoiceLoader]: - """Check if a default CEVOICE package can be loaded and return the loader. + """Check if a default CEVOICE/CECHAR package can be loaded and return the loader. Returns: - Optional[CEVoiceLoader]: The default CEVOICE package loader. + Optional[CEVoiceLoader]: The default CEVOICE/CECHAR package loader. """ global _DEFAULT_LOADER, _DEFAULT_LOADER_INITIALIZED, _DEFAULT_LOADER_FAILED global _DEFAULT_LOADER_FELL_BACK_FROM diff --git a/celune/chroma.py b/celune/chroma.py index 87554f7..856743b 100644 --- a/celune/chroma.py +++ b/celune/chroma.py @@ -13,11 +13,11 @@ import numpy as np import numpy.typing as npt -from openrgb.utils import RGBColor from openrgb import OpenRGBClient +from openrgb.utils import RGBColor -from .colors import RGB from .dsp import _split +from .colors import RGB, ERROR from .constants import BASE_SR from .utils import to_rgb, lunar_info, range_interpolated, is_celune_day @@ -39,7 +39,7 @@ def __init__( self._fix_color_rendering(to_rgb(color)), dtype=np.float32 ) self.fatal_color = np.array( - self._fix_color_rendering(to_rgb("#ce2006")), dtype=np.float32 + self._fix_color_rendering(to_rgb(ERROR)), dtype=np.float32 ) self._current_color = self.base_color.copy() self._target_color = self.base_color.copy() @@ -53,17 +53,7 @@ def __init__( self.client = None self.devices = [] - self.speech_threshold = 0.06 - self._level_history = np.zeros(3, dtype=np.float32) self._scheduled_chunks = deque() - - self.hold_duration = 1.8 - - # different pipeline configurations had different glow effects - # it currently performs a heartbeat-like pulse effect - self.pulse = True - self.fade_in_rate = 0.03 - self.fade_out_rate = 0.02 self.fps = 60 self.transition_rate = 0.02 @@ -96,8 +86,8 @@ def __init__( self.max_brightness = 1.0 self.input_gain = 4.0 - self.gamma = 1.4 - self.pulse_rate = 1.1 + self.gamma = 1.8 + self.smoothing_factor = 0.8 self.fast = True self._lock = threading.Lock() @@ -107,7 +97,7 @@ def __init__( self._current_brightness = 0.0 self._target_brightness = self.idle_brightness self._sleep_restore_brightness = self.idle_brightness - self._last_speech_time = 0.0 + self._smoothed_level = 0.0 self._state = "none" @@ -130,7 +120,7 @@ def connect(self) -> bool: with contextlib.suppress(Exception): device.set_custom_mode() return True - except TimeoutError: + except (TimeoutError, OSError): self.client = None self.connect_failed = True self.devices = [] @@ -236,7 +226,8 @@ def schedule(self, audio: npt.NDArray[np.float32]) -> None: if not self.start(): return - chunks = _split(audio, BASE_SR, 8) + chunk_seconds = 1.0 / float(self.fps) + chunks = _split(audio, BASE_SR, chunk_seconds) now = time.monotonic() offset = 0.0 @@ -257,19 +248,38 @@ def glow(self, audio: npt.NDArray[np.float32]) -> None: self._process_glow_chunk(audio, time.monotonic()) + def reset_audio_reactivity(self) -> None: + """Clear queued audio and fade the glow back to its idle brightness.""" + if self._worker is None or not self._worker.is_alive(): + return + + with self._lock: + self._scheduled_chunks.clear() + self._smoothed_level = 0.0 + if self._state not in {"fatal", "sleeping", "waking", "leaving", "none"}: + self._state = "normal" + self._target_brightness = self.idle_brightness + def _process_glow_chunk(self, audio: npt.NDArray[np.float32], now: float) -> None: - """Process one audio chunk and update recent speech timing.""" + """Process one audio chunk and update audio-reactive brightness.""" + del now level = self._speech_level(audio) - - self._level_history[:-1] = self._level_history[1:] - self._level_history[-1] = level - smoothed_level = float(np.mean(self._level_history)) + smoothing = float(np.clip(self.smoothing_factor, 0.0, 0.98)) + self._smoothed_level = (self._smoothed_level * smoothing) + ( + level * (1.0 - smoothing) + ) + smoothed_level = float(np.clip(self._smoothed_level, 0.0, 1.0)) with self._lock: - if smoothed_level > self.speech_threshold: - self._state = "normal" - self._last_speech_time = now - self._target_brightness = self.max_brightness + self._state = "normal" + self._target_brightness = float( + np.clip( + self.idle_brightness + + (self.max_brightness - self.idle_brightness) * smoothed_level, + self.idle_brightness, + self.max_brightness, + ) + ) @staticmethod def _to_mono(audio: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]: @@ -298,13 +308,14 @@ def _fix_color_rendering(rgb: RGB) -> RGB: return int(np.clip(r, 0, 255)), int(np.clip(g, 0, 255)), int(np.clip(b, 0, 255)) def _speech_level(self, audio: npt.NDArray[np.float32]) -> float: - """Calculate normalized speech activity level.""" + """Calculate normalized audio activity level from RMS energy.""" audio = self._to_mono(audio) if audio.size == 0: return 0.0 - amp = float(np.mean(np.abs(audio), dtype=np.float64)) - level = np.clip(amp * self.input_gain, 0.0, 1.0) + rms = float(np.sqrt(np.mean(np.square(audio), dtype=np.float64))) + level = np.clip(rms * self.input_gain, 0.0, 1.0) + level = float(np.log1p(6.0 * level) / np.log1p(6.0)) level = level ** (1.0 / self.gamma) return float(np.clip(level, 0.0, 1.0)) @@ -336,7 +347,6 @@ def _run(self) -> None: with self._lock: state = self._state target = self._target_brightness - last_speech = self._last_speech_time if state == "entering": target = self.idle_brightness @@ -389,23 +399,8 @@ def _run(self) -> None: self._state = "normal" else: - speaking_for = now - last_speech - target = self.idle_brightness - - if speaking_for > self.hold_duration: - target = self.idle_brightness - elif self.pulse: - pulse_phase = 2.0 * np.pi * self.pulse_rate * speaking_for - pulse_wave = 0.5 * (1.0 + np.sin(pulse_phase)) - target = self.idle_brightness + ( - (self.max_brightness - self.idle_brightness) * pulse_wave - ) - - alpha = ( - self.fade_in_rate - if target > self._current_brightness - else self.fade_out_rate - ) + target = max(target, self.idle_brightness) + alpha = max(self.transition_rate, min(0.25, frame_sleep * 6.0)) self._current_brightness += (target - self._current_brightness) * alpha self._current_brightness = float( np.clip( diff --git a/celune/colors.py b/celune/colors.py index 261d295..35a1c24 100644 --- a/celune/colors.py +++ b/celune/colors.py @@ -7,12 +7,13 @@ from textual.theme import Theme +from .typing.common import RGB from .utils import to_rgb DEFAULT_BACKGROUND: Final[str] = "#1d1826" DEFAULT_ACCENT: Final[str] = "#cebaff" FADED_ACCENT: Final[str] = "#9c88ce" -RGB = tuple[int, int, int] +ERROR: Final[str] = "#ce2006" def random_hex() -> str: @@ -205,9 +206,9 @@ def configure_theme( """Rebuild Celune's theme family from three bundle-provided seed colors. Args: - background: The background color provided by a CEVOICE pack. - accent: The accent color provided by a CEVOICE pack. - faded_accent: The sleep-state accent color provided by a CEVOICE pack. + background: The background color provided by a CEVOICE/CECHAR pack. + accent: The accent color provided by a CEVOICE/CECHAR pack. + faded_accent: The sleep-state accent color provided by a CEVOICE/CECHAR pack. """ global THEME, THEME_LIGHT, SEVERITY_COLORS dark_sleeping = FADED_ACCENT if faded_accent is None else faded_accent diff --git a/celune/config.py b/celune/config.py index 0b1f1fa..6f1b16f 100644 --- a/celune/config.py +++ b/celune/config.py @@ -2,14 +2,13 @@ """Configuration helpers for Celune.""" import os -from collections.abc import Mapping from copy import deepcopy from typing import Optional +from collections.abc import Mapping -from .constants import JSONSerializable +from .typing.common import Config, JSONSerializable ENABLED_ENV_VALUES = {"1", "true", "on", "yes", "enabled"} -type Config = dict[str, JSONSerializable] def env_bool(name: str, fallback: bool = False) -> bool: diff --git a/celune/constants.py b/celune/constants.py index 470f1f8..356a758 100644 --- a/celune/constants.py +++ b/celune/constants.py @@ -3,12 +3,17 @@ import signal import datetime -from enum import auto -from enum import IntEnum, Enum -from typing import Union +from enum import auto, IntEnum, Enum + +from .typing.common import JSON as _JSON +from .typing.common import JSONSerializable as _JSONSerializable + +JSONSerializable = _JSONSerializable +JSON = _JSON # main app name # why would you rename her? she doesn't approve of it +# don't blame her when you fork Celune and rename her to something else APP_NAME = "Celune" APP_SLUG = "".join(char if char.isalnum() else "_" for char in APP_NAME.lower()) @@ -21,7 +26,7 @@ NORMALIZER_MODEL_ID = "lunahr/CeluneNorm-0.6B-v2.0-ctx2048" # this embedding model is used to extract a voice embedding vector out of the target utterance, -# and analyze the voice automatically based on any given embeddings from your CEVOICE pack +# and analyze the voice automatically based on any given embeddings from your CEVOICE/CECHAR pack VOICE_EMBEDDING_MODEL = "marksverdhei/Qwen3-Voice-Embedding-12Hz-1.7B" # this embedding model is used to retrieve long-term Persona memories semantically when available PERSONA_MEMORY_EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" @@ -82,12 +87,13 @@ class ExitCodes(Enum): # we can't properly docstring enum values, so the comments below serve as docstrings EXIT_SUCCESS = 0 # Celune exited successfully. - EXIT_PENDING_UPDATE = 0 # Celune has a pending update. + EXIT_PENDING_UPDATE = 7 # Celune has a pending update. EXIT_FAILURE = 1 # Celune experienced a general failure. EXIT_NO_ANSI = 2 # Celune did not find an ANSI capable terminal. EXIT_ALREADY_RUNNING = 3 # Celune is already running. EXIT_MISSING_DEPENDENCIES = 4 # Celune is missing required dependencies. EXIT_UNKNOWN_ARGS = 5 # Celune CLI command is unknown. + EXIT_BAD_PYTHON = 6 # Celune is trying to run on an unsupported Python interpreter. # the following exit codes may be disabled by the end user EXIT_CELINE_DAY_SIX_SEVEN = 67 # Celune refuses to run on Celine Day. @@ -97,11 +103,6 @@ class ExitCodes(Enum): # SIGTSTP is not defined on Windows systems SIGTSTP = getattr(signal, "SIGTSTP", None) -type JSONSerializable = Union[ - None, bool, int, float, str, list["JSONSerializable"], dict[str, "JSONSerializable"] -] -type JSON = dict[str, JSONSerializable] - # pipeline state objects class PipelineStates(Enum): diff --git a/celune/dataclasses/__init__.py b/celune/dataclasses/__init__.py new file mode 100644 index 0000000..4359d32 --- /dev/null +++ b/celune/dataclasses/__init__.py @@ -0,0 +1,106 @@ +"""Unified Celune dataclass package with lazy re-exports.""" + +from importlib import import_module +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .celune import ( + CELUNE_CONSTANT_PROPERTIES, + CELUNE_FORWARDED_PROPERTIES, + CeluneAudioState, + CeluneBackendState, + CeluneCallbackState, + CeluneModelState, + CelunePipelineState, + CeluneRuntimeState, + CeluneVoiceState, + ) + from .extensions import CeluneContext + from .persona import ( + ChatMessage, + GenerateRequest, + GenerateResponse, + ) + from .pipeline import ( + PlaybackChunk, + PlaybackSourceDone, + SpeechDone, + SpeechRequest, + SpeechTiming, + ) + from .properties import ( + ConstantPropertySpec, + ForwardedPropertySpec, + bind_constant_properties, + bind_forwarded_properties, + constant_property, + forward_property, + ) + +_MODULE_EXPORTS = { + "CELUNE_CONSTANT_PROPERTIES": "celune", + "CELUNE_FORWARDED_PROPERTIES": "celune", + "CeluneAudioState": "celune", + "CeluneBackendState": "celune", + "CeluneCallbackState": "celune", + "CeluneContext": "extensions", + "CeluneModelState": "celune", + "CelunePipelineState": "celune", + "CeluneRuntimeState": "celune", + "CeluneVoiceState": "celune", + "ChatMessage": "persona", + "ConstantPropertySpec": "properties", + "ForwardedPropertySpec": "properties", + "GenerateRequest": "persona", + "GenerateResponse": "persona", + "PlaybackChunk": "pipeline", + "PlaybackSourceDone": "pipeline", + "SpeechDone": "pipeline", + "SpeechRequest": "pipeline", + "SpeechTiming": "pipeline", + "bind_constant_properties": "properties", + "bind_forwarded_properties": "properties", + "constant_property": "properties", + "forward_property": "properties", +} + +__all__ = [ + "CELUNE_CONSTANT_PROPERTIES", + "CELUNE_FORWARDED_PROPERTIES", + "CeluneAudioState", + "CeluneBackendState", + "CeluneCallbackState", + "CeluneContext", + "CeluneModelState", + "CelunePipelineState", + "CeluneRuntimeState", + "CeluneVoiceState", + "ChatMessage", + "ConstantPropertySpec", + "ForwardedPropertySpec", + "GenerateRequest", + "GenerateResponse", + "PlaybackChunk", + "PlaybackSourceDone", + "SpeechDone", + "SpeechRequest", + "SpeechTiming", + "bind_constant_properties", + "bind_forwarded_properties", + "constant_property", + "forward_property", +] + + +def __getattr__(name: str): + """Resolve dataclass exports lazily to avoid package import cycles.""" + module_name = _MODULE_EXPORTS.get(name) + if module_name is None: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + module = import_module(f"{__name__}.{module_name}") + return getattr(module, name) + + +def __dir__() -> list[str]: + """Return the lazily exported package surface.""" + return __all__ diff --git a/celune/dataclasses/celune.py b/celune/dataclasses/celune.py new file mode 100644 index 0000000..b73cd4c --- /dev/null +++ b/celune/dataclasses/celune.py @@ -0,0 +1,322 @@ +"""Grouped Celune runtime state containers and property specs.""" + +import queue +import threading +from dataclasses import dataclass, field +from typing import Optional, Union + +import numpy as np +import numpy.typing as npt +import sounddevice as sd +from transformers.modeling_utils import PreTrainedModel +from transformers.tokenization_utils_base import PreTrainedTokenizerBase + +from ..backends import CeluneBackend +from ..cevoice import CEVoicePersona +from ..chroma import AudioRGBGlow +from ..config import Config +from ..constants import JSONSerializable, PipelineStates +from ..dsp import StreamingPedalboardReverb +from ..extensions.manager import CeluneExtensionManager +from ..persona.impl import PersonaClient +from ..typing.backends import BackendModel +from ..typing.celune import ( + ErrorCallback, + IdleCallback, + InputStateCallback, + MessageCallback, + ProgressCallback, + QueueAvailableCallback, + VoiceChangedCallback, + VoiceLockStateCallback, +) +from .properties import ConstantPropertySpec, ForwardedPropertySpec + + +@dataclass +class CeluneCallbackState: + """Callbacks Celune uses to report state outward.""" + + log_callback: MessageCallback + status_callback: MessageCallback + error_callback: ErrorCallback + idle_callback: IdleCallback + queue_avail_callback: QueueAvailableCallback + voice_changed_callback: VoiceChangedCallback + change_input_state_callback: InputStateCallback + change_voice_lock_state_callback: VoiceLockStateCallback + progress_callback: ProgressCallback + + +@dataclass +class CeluneBackendState: + """Backend selection and configuration state.""" + + config: Config + backend_spec: Optional[Union[str, type[CeluneBackend]]] = None + backend_kwargs: dict[str, JSONSerializable] = field(default_factory=dict) + backend: Optional[CeluneBackend] = None + tts_backend: str = "" + chunk_size: int = 0 + language: str = "Auto" + dev: bool = False + use_normalization: bool = False + + +@dataclass +class CeluneModelState: + """Loaded TTS and normalizer model state.""" + + model: Optional[BackendModel] = None + model_name: str = "" + llm: Optional[PreTrainedModel] = None + tokenizer: Optional[PreTrainedTokenizerBase] = None + last_warmup_error: Optional[Exception] = None + normalizer_load_epoch: int = 0 + + +@dataclass +class CeluneVoiceState: + """Voice and character-related state.""" + + current_voice: Optional[str] = None + current_character: Optional[str] = None + current_character_persona: Optional[CEVoicePersona] = None + voice_bundle_is_default: bool = True + persona_history: list[dict[str, str]] = field(default_factory=list) + persona_attachments: list[dict[str, str]] = field(default_factory=list) + voices: tuple[str, ...] = () + voice_prompt: Optional[str] = None + + +@dataclass +class CelunePipelineState: + """Queues, worker threads, locks, and playback coordination.""" + + text_queue: queue.Queue = field(default_factory=queue.Queue) + audio_queue: queue.Queue = field(default_factory=queue.Queue) + playback_thread: Optional[threading.Thread] = None + generation_thread: Optional[threading.Thread] = None + api_thread: Optional[threading.Thread] = None + persona_thread: Optional[threading.Thread] = None + queue_lock: threading.Lock = field(default_factory=threading.Lock) + utterance_force_stop: threading.Event = field(default_factory=threading.Event) + next_playback_source_id: int = 0 + playback_source_statuses: dict[int, str] = field(default_factory=dict) + playback_source_meta: dict[int, dict[str, Union[str, float]]] = field( + default_factory=dict + ) + playback_progress_last_emit_at: float = 0.0 + playback_progress_last_source_id: int = 0 + model_ready: threading.Event = field(default_factory=threading.Event) + playback_done: threading.Event = field(default_factory=threading.Event) + say_lock: threading.Lock = field(default_factory=threading.Lock) + wake_lock: threading.Lock = field(default_factory=threading.Lock) + model_lock: threading.RLock = field(default_factory=threading.RLock) + exit_requested: bool = False + + +@dataclass +class CeluneAudioState: + """Audio output and effect-related state.""" + + stream: Optional[sd.OutputStream] = None + current_sr: Optional[int] = None + audio_unavailable: bool = False + can_use_rubberband: bool = True + speed: float = 1.0 + reverb: StreamingPedalboardReverb = field(default_factory=StreamingPedalboardReverb) + recently_saved: Optional[str] = None + kept_sfx_audio: Optional[npt.NDArray[np.float32]] = None + + +@dataclass +class CeluneRuntimeState: + """Top-level lifecycle and runtime integration state.""" + + regenerate: bool = False + locked: bool = True + loaded: bool = False + sleeping: bool = False + last_flavor: Optional[str] = None + ready_announced: bool = False + cur_state: str = "init" + is_in_tutorial: bool = False + extension_manager: Optional[CeluneExtensionManager] = None + glow: Optional[AudioRGBGlow] = None + vision: Optional[PersonaClient] = None + + +CELUNE_FORWARDED_PROPERTIES = ( + ForwardedPropertySpec("log_callback", "_callbacks", "log_callback"), + ForwardedPropertySpec("status_callback", "_callbacks", "status_callback"), + ForwardedPropertySpec("error_callback", "_callbacks", "error_callback"), + ForwardedPropertySpec("idle_callback", "_callbacks", "idle_callback"), + ForwardedPropertySpec("queue_avail_callback", "_callbacks", "queue_avail_callback"), + ForwardedPropertySpec( + "voice_changed_callback", "_callbacks", "voice_changed_callback" + ), + ForwardedPropertySpec( + "change_input_state_callback", + "_callbacks", + "change_input_state_callback", + ), + ForwardedPropertySpec( + "change_voice_lock_state_callback", + "_callbacks", + "change_voice_lock_state_callback", + ), + ForwardedPropertySpec("progress_callback", "_callbacks", "progress_callback"), + ForwardedPropertySpec("config", "_backend_state", "config"), + ForwardedPropertySpec("_backend_spec", "_backend_state", "backend_spec"), + ForwardedPropertySpec("_backend_kwargs", "_backend_state", "backend_kwargs"), + ForwardedPropertySpec("backend", "_backend_state", "backend"), + ForwardedPropertySpec("tts_backend", "_backend_state", "tts_backend"), + ForwardedPropertySpec("chunk_size", "_backend_state", "chunk_size"), + ForwardedPropertySpec("language", "_backend_state", "language"), + ForwardedPropertySpec("dev", "_backend_state", "dev"), + ForwardedPropertySpec("use_normalization", "_backend_state", "use_normalization"), + ForwardedPropertySpec("model", "_model_state", "model"), + ForwardedPropertySpec("model_name", "_model_state", "model_name"), + ForwardedPropertySpec("llm", "_model_state", "llm"), + ForwardedPropertySpec("tokenizer", "_model_state", "tokenizer"), + ForwardedPropertySpec("_last_warmup_error", "_model_state", "last_warmup_error"), + ForwardedPropertySpec( + "_normalizer_load_epoch", + "_model_state", + "normalizer_load_epoch", + ), + ForwardedPropertySpec("current_voice", "_voice_state", "current_voice"), + ForwardedPropertySpec("current_character", "_voice_state", "current_character"), + ForwardedPropertySpec( + "current_character_persona", + "_voice_state", + "current_character_persona", + ), + ForwardedPropertySpec( + "voice_bundle_is_default", + "_voice_state", + "voice_bundle_is_default", + ), + ForwardedPropertySpec("persona_history", "_voice_state", "persona_history"), + ForwardedPropertySpec("persona_attachments", "_voice_state", "persona_attachments"), + ForwardedPropertySpec("voices", "_voice_state", "voices"), + ForwardedPropertySpec("voice_prompt", "_voice_state", "voice_prompt"), + ForwardedPropertySpec("text_queue", "_pipeline_state", "text_queue"), + ForwardedPropertySpec("audio_queue", "_pipeline_state", "audio_queue"), + ForwardedPropertySpec("_playback_thread", "_pipeline_state", "playback_thread"), + ForwardedPropertySpec("_generation_thread", "_pipeline_state", "generation_thread"), + ForwardedPropertySpec("_api_thread", "_pipeline_state", "api_thread"), + ForwardedPropertySpec("_persona_thread", "_pipeline_state", "persona_thread"), + ForwardedPropertySpec("_queue_lock", "_pipeline_state", "queue_lock"), + ForwardedPropertySpec( + "_utterance_force_stop", + "_pipeline_state", + "utterance_force_stop", + ), + ForwardedPropertySpec( + "_next_playback_source_id", + "_pipeline_state", + "next_playback_source_id", + ), + ForwardedPropertySpec( + "_playback_source_statuses", + "_pipeline_state", + "playback_source_statuses", + ), + ForwardedPropertySpec( + "_playback_source_meta", "_pipeline_state", "playback_source_meta" + ), + ForwardedPropertySpec( + "_playback_progress_last_emit_at", + "_pipeline_state", + "playback_progress_last_emit_at", + ), + ForwardedPropertySpec( + "_playback_progress_last_source_id", + "_pipeline_state", + "playback_progress_last_source_id", + ), + ForwardedPropertySpec("_model_ready", "_pipeline_state", "model_ready"), + ForwardedPropertySpec("_playback_done", "_pipeline_state", "playback_done"), + ForwardedPropertySpec("_say_lock", "_pipeline_state", "say_lock"), + ForwardedPropertySpec("_wake_lock", "_pipeline_state", "wake_lock"), + ForwardedPropertySpec("_model_lock", "_pipeline_state", "model_lock"), + ForwardedPropertySpec("_exit_requested", "_pipeline_state", "exit_requested"), + ForwardedPropertySpec("_stream", "_audio_state", "stream"), + ForwardedPropertySpec("_current_sr", "_audio_state", "current_sr"), + ForwardedPropertySpec("_audio_unavailable", "_audio_state", "audio_unavailable"), + ForwardedPropertySpec("can_use_rubberband", "_audio_state", "can_use_rubberband"), + ForwardedPropertySpec("speed", "_audio_state", "speed"), + ForwardedPropertySpec("reverb", "_audio_state", "reverb"), + ForwardedPropertySpec("recently_saved", "_audio_state", "recently_saved"), + ForwardedPropertySpec("kept_sfx_audio", "_audio_state", "kept_sfx_audio"), + ForwardedPropertySpec("regenerate", "_runtime_state", "regenerate"), + ForwardedPropertySpec("locked", "_runtime_state", "locked"), + ForwardedPropertySpec("loaded", "_runtime_state", "loaded"), + ForwardedPropertySpec("sleeping", "_runtime_state", "sleeping"), + ForwardedPropertySpec("_last_flavor", "_runtime_state", "last_flavor"), + ForwardedPropertySpec("_ready_announced", "_runtime_state", "ready_announced"), + ForwardedPropertySpec("cur_state", "_runtime_state", "cur_state"), + ForwardedPropertySpec("is_in_tutorial", "_runtime_state", "is_in_tutorial"), + ForwardedPropertySpec("extension_manager", "_runtime_state", "extension_manager"), + ForwardedPropertySpec("glow", "_runtime_state", "glow"), + ForwardedPropertySpec("vision", "_runtime_state", "vision"), + ForwardedPropertySpec("stream", "_audio_state", "stream"), + ForwardedPropertySpec("say_lock", "_pipeline_state", "say_lock", read_only=True), + ForwardedPropertySpec( + "utterance_force_stop", + "_pipeline_state", + "utterance_force_stop", + read_only=True, + ), + ForwardedPropertySpec( + "queue_lock", "_pipeline_state", "queue_lock", read_only=True + ), + ForwardedPropertySpec( + "playback_done", "_pipeline_state", "playback_done", read_only=True + ), + ForwardedPropertySpec( + "model_ready", "_pipeline_state", "model_ready", read_only=True + ), + ForwardedPropertySpec( + "generation_thread", + "_pipeline_state", + "generation_thread", + read_only=True, + ), + ForwardedPropertySpec( + "playback_thread", + "_pipeline_state", + "playback_thread", + read_only=True, + ), + ForwardedPropertySpec( + "exit_requested", "_pipeline_state", "exit_requested", read_only=True + ), + ForwardedPropertySpec( + "model_lock", "_pipeline_state", "model_lock", read_only=True + ), + ForwardedPropertySpec( + "audio_unavailable", + "_audio_state", + "audio_unavailable", + read_only=True, + ), + ForwardedPropertySpec("current_sr", "_audio_state", "current_sr"), +) + +CELUNE_CONSTANT_PROPERTIES = ( + ConstantPropertySpec( + "force_stop_marker", + PipelineStates.UTTERANCE_FORCE_END, + ), + ConstantPropertySpec( + "utterance_done", + PipelineStates.UTTERANCE_END, + ), + ConstantPropertySpec( + "sentinel", + PipelineStates.TERMINATE, + ), +) diff --git a/celune/dataclasses/extensions.py b/celune/dataclasses/extensions.py new file mode 100644 index 0000000..07afcd3 --- /dev/null +++ b/celune/dataclasses/extensions.py @@ -0,0 +1,63 @@ +"""Extension-facing dataclasses.""" + +from dataclasses import dataclass, field + +from .. import __version__ +from ..typing.common import JSONSerializable +from ..typing.extensions import ( + DevLogCallable, + GetStateCallable, + LogCallable, + PlayCallable, + SayCallable, + SetVoiceCallable, + StatusCallable, + ThinkCallable, + WaitUntilReadyCallable, +) + +CELUNE_VERSION = __version__ + + +@dataclass(slots=True) +class CeluneContext: + """Celune's extension context.""" + + log: LogCallable + log_dev: DevLogCallable + say: SayCallable + think: ThinkCallable + play: PlayCallable + status: StatusCallable + set_voice: SetVoiceCallable + get_state: GetStateCallable + wait_until_ready: WaitUntilReadyCallable + name: str = "Celune" + version: str = CELUNE_VERSION + shared: dict[str, JSONSerializable] = field(default_factory=dict) + dev: bool = False + + def expose(self, key: str, value: JSONSerializable) -> None: + """Expose a shared object. + + Args: + key: Shared-object name to publish. + value: JSON-serializable value exposed to extensions. + """ + self.shared[key] = value + + def get( + self, + key: str, + default: JSONSerializable = None, + ) -> JSONSerializable: + """Get a shared object. + + Args: + key: Shared-object name to read. + default: Fallback returned when the key is missing. + + Returns: + JSONSerializable: The stored value, or ``default`` when absent. + """ + return self.shared.get(key, default) diff --git a/celune/dataclasses/persona.py b/celune/dataclasses/persona.py new file mode 100644 index 0000000..dea038b --- /dev/null +++ b/celune/dataclasses/persona.py @@ -0,0 +1,40 @@ +"""Persona runtime dataclasses.""" + +from dataclasses import dataclass, field +from typing import Optional + +from ..typing.persona import MessageContent, Role + + +@dataclass(slots=True) +class ChatMessage: + """One OpenAI-style chat message.""" + + role: Role + content: MessageContent + + +@dataclass(slots=True) +class GenerateRequest: + """Celune-to-Persona generation request.""" + + model: Optional[str] = None + quantization: Optional[str] = None + quantized: bool = True + system: Optional[str] = None + user: Optional[str] = None + messages: list[ChatMessage] = field(default_factory=list) + max_new_tokens: int = 220 + temperature: float = 0.75 + top_p: float = 0.9 + repetition_penalty: float = 1.05 + + +@dataclass(slots=True) +class GenerateResponse: + """Persona generation response.""" + + text: str + response: str + model: str + quantization: str diff --git a/celune/dataclasses/pipeline.py b/celune/dataclasses/pipeline.py new file mode 100644 index 0000000..193a5b4 --- /dev/null +++ b/celune/dataclasses/pipeline.py @@ -0,0 +1,94 @@ +"""Speech pipeline dataclasses.""" + +from __future__ import annotations + +import queue +import time +from dataclasses import dataclass +from typing import Optional, Union + +import numpy as np +import numpy.typing as npt + +from ..constants import N_A_NUMERIC + + +@dataclass(frozen=True) +class SpeechRequest: + """Queued speech input and output persistence preference.""" + + text: str + display_text: str + language: str = "Auto" + save: bool = True + stream_queue: Optional[ + "queue.Queue[Optional[Union[npt.NDArray[np.float32], Exception]]]" + ] = None + normalize: bool = False + + +@dataclass(frozen=True) +class SpeechDone: + """Playback completion marker for one generated utterance.""" + + saved_path: Optional[str] = None + analysis_audio: Optional[npt.NDArray[np.float32]] = None + + +@dataclass(frozen=True) +class PlaybackChunk: + """One playback-source chunk routed through the shared DSP mixer.""" + + source_id: int + audio: npt.NDArray[np.float32] + sample_rate: int + timing: Optional["SpeechTiming"] = None + + +@dataclass(frozen=True) +class PlaybackSourceDone: + """Completion marker for one playback source in the shared DSP mixer.""" + + source_id: int + release_pipeline: bool = False + saved_path: Optional[str] = None + analysis_audio: Optional[npt.NDArray[np.float32]] = None + + +@dataclass +class SpeechTiming: + """Timing data for a generated speech utterance.""" + + start_time: float + first_chunk_time: Optional[float] = None + first_playback_time: Optional[float] = None + + def mark_first_chunk(self) -> None: + """Record when the backend yields its first audio chunk.""" + if self.first_chunk_time is None: + self.first_chunk_time = time.monotonic() + + def mark_first_playback(self) -> None: + """Record when the first audio chunk is sent to the output stream.""" + if self.first_playback_time is None: + self.first_playback_time = time.monotonic() + + def ttfc_ms(self) -> float: + """Return time to first generated chunk in milliseconds. + + Returns: + float: Elapsed milliseconds until the first generated chunk. + """ + if self.first_chunk_time is None: + return N_A_NUMERIC + return (self.first_chunk_time - self.start_time) * 1000 + + def ttfp_seconds(self) -> float: + """Return time to first playback in seconds. + + Returns: + float: Elapsed seconds until the first audible playback chunk. + """ + if self.first_playback_time is None: + return N_A_NUMERIC + return self.first_playback_time - self.start_time diff --git a/celune/dataclasses/properties.py b/celune/dataclasses/properties.py new file mode 100644 index 0000000..2da2e03 --- /dev/null +++ b/celune/dataclasses/properties.py @@ -0,0 +1,105 @@ +"""Property helpers for grouped Celune runtime state.""" + +from dataclasses import dataclass +from typing import Optional + + +@dataclass(frozen=True, slots=True) +class ForwardedPropertySpec: + """Describe one property forwarded into a state container.""" + + name: str + container_name: str + field_name: str + doc: Optional[str] = None + read_only: bool = False + + +@dataclass(frozen=True, slots=True) +class ConstantPropertySpec: + """Describe one constant-backed property.""" + + name: str + value: object + doc: Optional[str] = None + + +def forward_property( + container_name: str, + field_name: str, + *, + doc: Optional[str] = None, + read_only: bool = False, +) -> property: + """Create a property that forwards storage to a grouped state container. + + Args: + container_name: Attribute holding the grouped state object. + field_name: Field name inside that grouped state object. + doc: Optional property docstring to attach. + read_only: Whether the generated property should omit a setter. + + Returns: + property: A descriptor that reads from the grouped state container. + """ + + def getter(instance): + return getattr(getattr(instance, container_name), field_name) + + if read_only: + return property(getter, doc=doc) + + def setter(instance, value) -> None: + setattr(getattr(instance, container_name), field_name, value) + + return property(getter, setter, doc=doc) + + +def constant_property(value: object, *, doc: Optional[str] = None) -> property: + """Create a read-only property that always returns one constant value. + + Args: + value: Constant value returned by the property. + doc: Optional property docstring to attach. + + Returns: + property: A descriptor that always returns ``value``. + """ + + def getter(_instance): + return value + + return property(getter, doc=doc) + + +def bind_forwarded_properties( + namespace: dict[str, object], + specs: tuple[ForwardedPropertySpec, ...], +) -> None: + """Populate a class namespace with forwarded properties. + + Args: + namespace: Class-body namespace being assembled. + specs: Forwarding definitions to install into the namespace. + """ + for spec in specs: + namespace[spec.name] = forward_property( + spec.container_name, + spec.field_name, + doc=spec.doc, + read_only=spec.read_only, + ) + + +def bind_constant_properties( + namespace: dict[str, object], + specs: tuple[ConstantPropertySpec, ...], +) -> None: + """Populate a class namespace with constant-backed properties. + + Args: + namespace: Class-body namespace being assembled. + specs: Constant-property definitions to install into the namespace. + """ + for spec in specs: + namespace[spec.name] = constant_property(spec.value, doc=spec.doc) diff --git a/celune/dsp.py b/celune/dsp.py index 5435dc1..1d163ac 100644 --- a/celune/dsp.py +++ b/celune/dsp.py @@ -2,19 +2,22 @@ """Celune audio processing functions.""" import math -from typing import Iterable +from typing import Iterable, Callable from importlib.resources import as_file, files import numpy as np import numpy.typing as npt import soundfile as sf from scipy.signal import resample_poly -from pedalboard import Pedalboard, Reverb +from pedalboard import Pedalboard, PitchShift, Reverb from .constants import UtteranceLoudnessTier, BASE_SR from .exceptions import AudioMismatchError, BadAudioError +_SIGNAL_CACHE: dict[str, npt.NDArray[np.float32]] = {} + + def _resample_audio( audio: npt.NDArray[np.float32], source_sr: int, target_sr: int = BASE_SR ) -> npt.NDArray[np.float32]: @@ -68,13 +71,34 @@ def _to_48khz( return _resample_audio(audio, source_sr, BASE_SR) -def readiness_signal() -> npt.NDArray[np.float32]: - """Load Celune's startup readiness sound. +def _pitch_shift_ui_signal( + audio: npt.NDArray[np.float32], n_steps: float +) -> npt.NDArray[np.float32]: + """Shift pitch while preserving tempo for short deterministic UI signals.""" + shifted = Pedalboard([PitchShift(semitones=n_steps)])(audio, BASE_SR) + return np.ascontiguousarray(shifted, dtype=np.float32) - Returns: - npt.NDArray[np.float32]: The readiness sound formatted as a NumPy array, or silent array if not found. - """ - readiness_wav = files("celune").joinpath("assets", "readiness.wav") + +def _freeze_signal(audio: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]: + """Return one shared read-only buffer for a cached UI signal.""" + frozen = np.ascontiguousarray(audio, dtype=np.float32) + frozen.setflags(write=False) + return frozen + + +def _cached_signal( + name: str, factory: Callable[[], npt.NDArray[np.float32]] +) -> npt.NDArray[np.float32]: + """Return a cached signal waveform.""" + if name not in _SIGNAL_CACHE: + _SIGNAL_CACHE[name] = _freeze_signal(factory()) + + return _SIGNAL_CACHE[name] + + +def _load_readiness_signal() -> npt.NDArray[np.float32]: + """Load Celune's startup readiness sound.""" + readiness_wav = files("celune").joinpath("assets", "chord.wav") # we did not find the Celune chord, return silence instead if not readiness_wav.is_file(): @@ -86,6 +110,73 @@ def readiness_signal() -> npt.NDArray[np.float32]: return _to_48khz(np.asarray(audio, dtype=np.float32), sr) +def readiness_signal() -> npt.NDArray[np.float32]: + """Dynamically generate Celune's readiness sound. + + Returns: + npt.NDArray[np.float32]: The readiness sound formatted as a NumPy array, or silent array if not found. + """ + + return _cached_signal("readiness", _load_readiness_signal) + + +def sleeping_signal() -> npt.NDArray[np.float32]: + """Dynamically generate Celune's sleeping sound. + + Returns: + npt.NDArray[np.float32]: The sleeping sound formatted as a NumPy array, or a silent array if the readiness + sound wasn't found. + """ + + return _cached_signal( + "sleeping", + lambda: _pitch_shift_ui_signal( + readiness_signal(), + n_steps=-1, + ), + ) + + +def working_signal() -> npt.NDArray[np.float32]: + """Dynamically generate Celune's working sound. + + Returns: + npt.NDArray[np.float32]: The working sound formatted as a NumPy array, or a silent array if the readiness + sound wasn't found. + """ + + return _cached_signal( + "working", + lambda: _pitch_shift_ui_signal( + readiness_signal(), + n_steps=4, + ), + ) + + +def error_signal() -> npt.NDArray[np.float32]: + """Dynamically generate Celune's error sound. + + Returns: + npt.NDArray[np.float32]: The error sound formatted as a NumPy array, or a silent array if the readiness + sound wasn't found. + """ + + def factory() -> npt.NDArray[np.float32]: + base = readiness_signal() + high = _pitch_shift_ui_signal(base, n_steps=6) + tritone = base + high + + base_peak = np.max(np.abs(base)) + peak = np.max(np.abs(tritone)) + if peak > 0 and base_peak > 0: + tritone = tritone * (base_peak / peak) + + return tritone + + return _cached_signal("error", factory) + + def _soften( audio: npt.NDArray[np.float32], sr: int, diff --git a/celune/entrypoint.py b/celune/entrypoint.py index c957fe0..2ef5e4d 100644 --- a/celune/entrypoint.py +++ b/celune/entrypoint.py @@ -20,6 +20,8 @@ from celune import __version__, REVISION, __tagline__ from celune.constants import APP_NAME, APP_SLUG, ExitCodes +from celune.paths import project_root, running_compiled +from celune.updater import apply_update_and_restart def _env_flag(name: str) -> bool: @@ -35,7 +37,7 @@ def _env_flag(name: str) -> bool: # these parameters are used by the app CLI and its commands, e.g. 'celune doctor' LAUNCHED_VIA_LAUNCHER = _env_flag("CELUNE_LAUNCHER") SCRIPT_PATH = Path(__file__).resolve() -PROJECT_ROOT = SCRIPT_PATH.parent.parent +PROJECT_ROOT = project_root() SETUP_PATH = PROJECT_ROOT / "setup.py" DEFAULT_CONFIG_PATH = PROJECT_ROOT / "default_config.yaml" SCRIPT_NAME = "main.py" @@ -280,6 +282,14 @@ def _doctor_running_python() -> Path: return Path(sys.executable).resolve() +def _doctor_subprocess_python() -> Path: + """Return the Python executable doctor fixups should invoke.""" + if running_compiled(): + return _doctor_venv_python() + + return _doctor_running_python() + + def _doctor_same_path(left: Path, right: Path) -> bool: """Return whether two paths refer to the same normalized location.""" return os.path.normcase(str(left.resolve())) == os.path.normcase( @@ -713,6 +723,7 @@ def _doctor_checks() -> list[DoctorCheck]: optional_imports = [ ("torchvision", "torchvision", "Persona vision support uses torchvision."), ("pocket_tts", "pocket-tts", f"{APP_NAME} Mini needs pocket-tts."), + ("dots_tts", "dots.tts", "The dots.tts MeanFlow backend needs dots.tts."), ("voxcpm", "voxcpm", "The VoxCPM2 backend needs voxcpm."), ("openrgb", "openrgb-python", "Presence lighting needs openrgb-python."), ("matplotlib", "matplotlib", "Developer visualizations use matplotlib."), @@ -873,7 +884,7 @@ def run_doctor(argv: list[str]) -> int: print("Attempting to fix fixable problems...") try: result = subprocess.run( - [sys.executable, str(SETUP_PATH)], + [str(_doctor_subprocess_python()), str(SETUP_PATH)], cwd=PROJECT_ROOT, check=False, ) @@ -948,8 +959,8 @@ def start(verbose: bool = False) -> None: verbose: Whether the app should be started in verbose (developer) mode. Raises: - No: If `No` needs to be raised. - Exception: If `Exception` needs to be raised. + No: Raised on Celune's name day unless explicitly overridden. + Exception: Re-raised after printing a traceback in developer mode. """ runtime = _load_runtime() try: @@ -1025,6 +1036,13 @@ def start(verbose: bool = False) -> None: ).start() if choice: + if running_compiled(): + print( + f"{APP_NAME} will close so the launcher can apply the latest artifact." + ) + time.sleep(2) + sys.exit(runtime.ExitCodes.EXIT_PENDING_UPDATE.value) + print(f"Updating {APP_NAME}...") try: runtime.update_to_latest() @@ -1041,6 +1059,11 @@ def start(verbose: bool = False) -> None: sys.exit(runtime.ExitCodes.EXIT_PENDING_UPDATE.value) elif runtime.check_for_update() and not runtime.supports_ansi(): print("This terminal does not support ANSI.") + if running_compiled(): + print("Requesting the launcher to refresh the packaged binaries...") + time.sleep(2) + sys.exit(runtime.ExitCodes.EXIT_PENDING_UPDATE.value) + print("Attempting to apply update non-interactively...") try: runtime.update_to_latest() @@ -1179,6 +1202,24 @@ def main(argv: Optional[list[str]] = None) -> None: resolved_argv = normalize_argv0(argv) args = resolved_argv[1:] + if args and args[0] == "__apply_update": + if len(args) < 3: + print("Usage: celune __apply_update [args...]") + sys.exit(EXIT_CODES.EXIT_UNKNOWN_ARGS.value) + + try: + parent_pid = int(args[1]) + except ValueError: + print("Invalid launcher PID.") + sys.exit(EXIT_CODES.EXIT_UNKNOWN_ARGS.value) + + launcher_path = Path(args[2]).resolve() + try: + sys.exit(apply_update_and_restart(parent_pid, launcher_path, args[3:])) + except Exception as exc: + print(f"{APP_NAME} could not apply the launcher update: {exc}") + sys.exit(EXIT_CODES.EXIT_FAILURE.value) + if not args: start() elif args[0] in {"start", "run"}: diff --git a/celune/extensions/base.py b/celune/extensions/base.py index 01fa3e6..3d53dba 100644 --- a/celune/extensions/base.py +++ b/celune/extensions/base.py @@ -2,146 +2,11 @@ """Celune's extension annotations and classes.""" from abc import ABC -from dataclasses import dataclass, field -from typing import Protocol, Optional, runtime_checkable +from typing import Optional -from .. import __version__ -from ..constants import JSONSerializable +from ..dataclasses.extensions import CeluneContext from ..exceptions import IncompleteExtensionError -CELUNE_VERSION = __version__ - - -@runtime_checkable -class LogCallable(Protocol): - """Extension callable logging annotation.""" - - def __call__(self, msg: str, severity: str = "info") -> None: - """Emit a log message.""" - raise IncompleteExtensionError("protocol not defined") - - -@runtime_checkable -class DevLogCallable(Protocol): - """Extension callable developer logging annotation.""" - - def __call__(self, msg: str, severity: str = "info") -> None: - """Emit a developer log message.""" - raise IncompleteExtensionError("protocol not defined") - - -@runtime_checkable -class SayCallable(Protocol): - """Extension callable speech request annotation.""" - - def __call__( - self, - text: str, - save: bool = True, - display_text: Optional[str] = None, - ) -> bool: - """Queue text for speech.""" - raise IncompleteExtensionError("protocol not defined") - - -@runtime_checkable -class ThinkCallable(Protocol): - """Extension callable think request annotation.""" - - def __call__(self, text: str) -> bool: - """Start a think request.""" - raise IncompleteExtensionError("protocol not defined") - - -@runtime_checkable -class PlayCallable(Protocol): - """Extension callable play request annotation.""" - - def __call__(self, sound_path: str, keep: bool = False) -> bool: - """Queue an audio file for playback.""" - raise IncompleteExtensionError("protocol not defined") - - -@runtime_checkable -class StatusCallable(Protocol): - """Extension callable status update annotation.""" - - def __call__(self, msg: str, severity: str = "info") -> None: - """Emit a status update.""" - raise IncompleteExtensionError("protocol not defined") - - -@runtime_checkable -class SetVoiceCallable(Protocol): - """Extension callable voice setting request annotation.""" - - def __call__(self, name: str) -> bool: - """Request a voice change.""" - raise IncompleteExtensionError("protocol not defined") - - -@runtime_checkable -class GetStateCallable(Protocol): - """Extension callable state read annotation.""" - - def __call__(self) -> str: - """Read the current runtime state.""" - raise IncompleteExtensionError("protocol not defined") - - -@runtime_checkable -class WaitUntilReadyCallable(Protocol): - """Extension callable wait until ready annotation.""" - - def __call__(self, timeout: float = 30.0) -> bool: - """Wait for Celune to become ready.""" - raise IncompleteExtensionError("protocol not defined") - - -@dataclass(slots=True) -class CeluneContext: - """Celune's extension context.""" - - log: LogCallable - log_dev: DevLogCallable - say: SayCallable - think: ThinkCallable - play: PlayCallable - status: StatusCallable - set_voice: SetVoiceCallable - get_state: GetStateCallable - wait_until_ready: WaitUntilReadyCallable - - name: str = "Celune" - version: str = CELUNE_VERSION - shared: dict[str, JSONSerializable] = field(default_factory=dict) - dev: bool = False - - def expose(self, key: str, value: JSONSerializable) -> None: - """Expose a shared object. - - Args: - key: The name used to store the shared value. - value: The object to expose to other extensions. - """ - self.shared[key] = value - - def get( - self, - key: str, - default: JSONSerializable = None, - ) -> JSONSerializable: - """Get a shared object. - - Args: - key: The name of the shared value to fetch. - default: The fallback value returned when the key is missing. - - Returns: - JSONSerializable: The stored shared value, or ``default`` when absent. - """ - return self.shared.get(key, default) - class CeluneExtension(ABC): """Celune extension abstract base class.""" @@ -232,12 +97,18 @@ def think(self, text: str) -> bool: return self.ctx.think(text) - def play(self, sound_path: str, keep: bool = False) -> bool: + def play( + self, + sound_path: str, + keep: bool = False, + volume: float = 1.0, + ) -> bool: """Play arbitrary sound through Celune. Args: sound_path: The path to the audio file to play. keep: Whether to prepend this SFX to the next saved utterance. + volume: How loud should the SFX be played at. Returns: bool: ``True`` when playback was queued, otherwise ``False``. @@ -245,7 +116,7 @@ def play(self, sound_path: str, keep: bool = False) -> bool: if not self.ctx.wait_until_ready(): return False - return self.ctx.play(sound_path, keep=keep) + return self.ctx.play(sound_path, keep=keep, volume=volume) def status(self, msg: str, severity: str = "info") -> None: """Update status display. diff --git a/celune/extensions/manager.py b/celune/extensions/manager.py index 718e913..7f0ef63 100644 --- a/celune/extensions/manager.py +++ b/celune/extensions/manager.py @@ -3,11 +3,10 @@ import sys import inspect -import importlib.util import threading import traceback +import importlib.util from pathlib import Path -from typing import Type from ..utils import format_error from .base import CeluneContext, CeluneExtension @@ -22,7 +21,7 @@ def __init__(self, context: CeluneContext) -> None: self.extensions: dict[str, CeluneExtension] = {} self.auto_started = False - def register(self, extension_cls: Type[CeluneExtension]) -> CeluneExtension: + def register(self, extension_cls: type[CeluneExtension]) -> CeluneExtension: """Register a Celune extension class. Args: diff --git a/celune/modeling.py b/celune/modeling.py index 1f21ed9..e0be96f 100644 --- a/celune/modeling.py +++ b/celune/modeling.py @@ -6,8 +6,8 @@ import torch from transformers.modeling_utils import PreTrainedModel -from transformers.tokenization_utils_base import PreTrainedTokenizerBase from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers.tokenization_utils_base import PreTrainedTokenizerBase from .backends import CeluneBackend from .vram import resolve_vram_preset diff --git a/celune/namedays.py b/celune/namedays.py index fb510bf..55ea004 100644 --- a/celune/namedays.py +++ b/celune/namedays.py @@ -2,8 +2,8 @@ """Name day list derived from Polish name days, translated to English, and with Polish-only names removed.""" from typing import Union -from collections.abc import Iterator from datetime import date, datetime +from collections.abc import Iterator # some of these entries are empty because no suitable corresponding English name exists for certain Polish names NAME_DAYS: dict[str, list[str]] = { diff --git a/celune/paths.py b/celune/paths.py index ec270b7..881f130 100644 --- a/celune/paths.py +++ b/celune/paths.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: MIT """Runtime filesystem paths for Celune user data.""" +import sys import shutil from pathlib import Path from typing import Optional @@ -9,6 +10,34 @@ from .constants import APP_SLUG +_REPO_MARKERS = ("celune", "default_config.yaml", "pyproject.toml") + + +def running_compiled() -> bool: + """Return whether Celune is running from a compiled entrypoint. + + Returns: + bool: ``True`` when the active ``__main__`` module was marked as compiled. + """ + main_module = sys.modules.get("__main__") + return bool(getattr(main_module, "__compiled__", False)) + + +def _looks_like_repo_root(path: Path) -> bool: + """Return whether a path looks like the Celune repository root.""" + return all((path / marker).exists() for marker in _REPO_MARKERS) + + +def _compiled_project_root() -> Path: + """Resolve the repository root for compiled launches started from bin/.""" + executable_dir = Path(sys.argv[0]).resolve().parent + + for candidate in (executable_dir, executable_dir.parent): + if _looks_like_repo_root(candidate): + return candidate + + return executable_dir + def app_data_dir(create: bool = False) -> Path: """Return Celune's user data directory. @@ -17,7 +46,7 @@ def app_data_dir(create: bool = False) -> Path: create: Whether this directory should be created before being returned. Returns: - Celune's user data directory. + Path: Celune's user data directory. """ path = Path(user_data_dir(APP_SLUG, appauthor=False)) if create: @@ -32,7 +61,7 @@ def memory_data_dir(create: bool = False) -> Path: create: Whether this directory should be created before being returned. Returns: - Celune's persistent memory directory. + Path: Celune's persistent memory directory. """ path = app_data_dir(create=create) / "memory" if create: @@ -47,7 +76,7 @@ def temp_data_dir(create: bool = False) -> Path: create: Whether this directory should be created before being returned. Returns: - Celune's temporary data directory. + Path: Celune's temporary data directory. """ path = app_data_dir(create=create) / "temp" if create: @@ -62,7 +91,7 @@ def config_path(create_parent: bool = False) -> Path: create_parent: Whether this directory's parents should be created before the path being returned. Returns: - Celune's user configuration file path. + Path: Celune's user configuration file path. """ path = app_data_dir(create=create_parent) / "config.yaml" if create_parent: @@ -77,7 +106,7 @@ def traceback_path(create_parent: bool = False) -> Path: create_parent: Whether this directory's parents should be created before the path being returned. Returns: - Celune's traceback capture file path. + Path: Celune's traceback capture file path. """ path = app_data_dir(create=create_parent) / f"{APP_SLUG}_traceback.txt" if create_parent: @@ -92,7 +121,7 @@ def main_window_log_path(create_parent: bool = False) -> Path: create_parent: Whether this directory's parents should be created before the path being returned. Returns: - Celune's main window log file path. + Path: Celune's main window log file path. """ path = app_data_dir(create=create_parent) / f"{APP_SLUG}.log" if create_parent: @@ -104,8 +133,11 @@ def project_root() -> Path: """Return the repository root containing Celune's bundled defaults. Returns: - Celune's repository root directory. + Path: Celune's repository root directory. """ + if running_compiled(): + return _compiled_project_root() + return Path(__file__).resolve().parent.parent @@ -113,7 +145,7 @@ def default_config_path() -> Path: """Return the bundled default configuration file path. Returns: - Celune's default configuration file path. + Path: Celune's default configuration file path. """ return project_root() / "default_config.yaml" @@ -122,7 +154,7 @@ def legacy_config_path() -> Path: """Return the historical repo-root config file path. Returns: - Celune's legacy configuration file path. + Path: Celune's legacy configuration file path. """ return project_root() / "config.yaml" @@ -140,7 +172,7 @@ def ensure_config_path( legacy_path: Optional explicit legacy repo-root config file path. Returns: - The resolved active config path and whether the file had to be created. + tuple[Path, bool]: The resolved active config path and whether the file had to be created. """ resolved_active = active_path or config_path(create_parent=True) resolved_default = default_path or default_config_path() diff --git a/celune/persona/impl.py b/celune/persona/impl.py index 74a5800..2b244ff 100644 --- a/celune/persona/impl.py +++ b/celune/persona/impl.py @@ -10,6 +10,7 @@ from ..config import Config from ..cevoice import CEVoicePersona from ..vram import resolve_vram_preset +from .runtime import PersonaRuntime, request_from_json, response_to_json from ..constants import ( DEFAULT_PERSONA_CONTEXT, DEFAULT_PERSONA_DESCRIPTION, @@ -18,7 +19,6 @@ PERSONA_HISTORY_MESSAGES, PERSONA_MODEL_ID, ) -from .runtime import PersonaRuntime, request_from_json, response_to_json PERSONA_QUANTIZATION = "4bit" DevLogCallback = Callable[[str, str], None] @@ -140,13 +140,28 @@ def _config_text( def pack_persona(engine: Any) -> Optional[CEVoicePersona]: - """Return typed CEVOICE persona metadata attached to the current engine.""" + """Return typed CEVOICE persona metadata attached to the current engine. + + Args: + engine: Celune-like runtime object that may expose persona metadata. + + Returns: + Optional[CEVoicePersona]: The active persona metadata when present and typed. + """ persona = getattr(engine, "current_character_persona", None) return persona if isinstance(persona, CEVoicePersona) else None def pack_identity_text(engine: Any, field_name: str) -> str: - """Read one CEVOICE persona identity field when present.""" + """Read one CEVOICE persona identity field when present. + + Args: + engine: Celune-like runtime object that may expose persona metadata. + field_name: Identity-field attribute name to read from the persona. + + Returns: + str: Trimmed field value, or an empty string when the field is unavailable. + """ persona = pack_persona(engine) if persona is None: return "" @@ -156,7 +171,15 @@ def pack_identity_text(engine: Any, field_name: str) -> str: def pack_persona_text(engine: Any, field_name: str) -> str: - """Read one top-level CEVOICE persona text field when present.""" + """Read one top-level CEVOICE persona text field when present. + + Args: + engine: Celune-like runtime object that may expose persona metadata. + field_name: Top-level persona attribute name to read. + + Returns: + str: Trimmed field value, or an empty string when the field is unavailable. + """ persona = pack_persona(engine) if persona is None: return "" @@ -165,7 +188,15 @@ def pack_persona_text(engine: Any, field_name: str) -> str: def pack_persona_lines(engine: Any, field_name: str) -> tuple[str, ...]: - """Read one CEVOICE persona text-list field when present.""" + """Read one CEVOICE persona text-list field when present. + + Args: + engine: Celune-like runtime object that may expose persona metadata. + field_name: Persona tuple field to read and normalize into non-empty lines. + + Returns: + tuple[str, ...]: Trimmed non-empty lines from the requested persona field. + """ persona = pack_persona(engine) if persona is None: return () @@ -177,7 +208,14 @@ def pack_persona_lines(engine: Any, field_name: str) -> tuple[str, ...]: def persona_active_character_name(engine: Any) -> str: - """Return the active character name used for Persona memory isolation.""" + """Return the active character name used for Persona memory isolation. + + Args: + engine: Celune-like runtime object holding the current character selection. + + Returns: + str: Active character name, falling back to config defaults when needed. + """ current_character = getattr(engine, "current_character", None) if isinstance(current_character, str) and current_character.strip(): return current_character.strip() @@ -190,38 +228,74 @@ def persona_active_character_name(engine: Any) -> str: def uses_default_celune_identity(engine: Any) -> bool: - """Return whether Persona defaults should use Celune's canonical identity.""" + """Return whether Persona defaults should use Celune's canonical identity. + + Args: + engine: Celune-like runtime object holding the active voice bundle state. + + Returns: + bool: ``True`` when the default Celune voice bundle is active for Celune. + """ if not bool(getattr(engine, "voice_bundle_is_default", False)): return False return persona_active_character_name(engine).strip().lower() == "celune" def default_persona_persona() -> str: - """Return the default persona instructions for the active character.""" + """Return the default persona instructions for the active character. + + Returns: + str: Built-in fallback system prompt used for Persona conversations. + """ return DEFAULT_PERSONA_DESCRIPTION def default_persona_age(engine: Any) -> str: - """Return the default age for the active character source.""" + """Return the default age for the active character source. + + Args: + engine: Celune-like runtime object used to choose default identity values. + + Returns: + str: Default age string for the active persona source. + """ if uses_default_celune_identity(engine): return "28" return "unknown" def default_persona_gender(engine: Any) -> str: - """Return a conservative gender default for the active character source.""" + """Return a conservative gender default for the active character source. + + Args: + engine: Celune-like runtime object used to choose default identity values. + + Returns: + str: Default gender string for the active persona source. + """ if uses_default_celune_identity(engine): return "female" return "unknown" def default_persona_context() -> str: - """Return the default interaction context for the active character source.""" + """Return the default interaction context for the active character source. + + Returns: + str: Built-in fallback environment and relationship context. + """ return DEFAULT_PERSONA_CONTEXT def persona_style_traits(engine: Any) -> dict[str, str]: - """Return the configured speaking-style traits for a Persona request.""" + """Return the configured speaking-style traits for a Persona request. + + Args: + engine: Celune-like runtime object that may expose persona style metadata. + + Returns: + dict[str, str]: Style-trait values merged with Celune's default trait set. + """ traits = { "warmth": "mid", "directness": "mid", @@ -250,7 +324,14 @@ def persona_style_traits(engine: Any) -> dict[str, str]: def persona_short_term_history_limit(engine: Any) -> int: - """Return the configured short-term memory length for Persona.""" + """Return the configured short-term memory length for Persona. + + Args: + engine: Celune-like runtime object whose config may override the default. + + Returns: + int: Maximum number of recent chat messages to keep in short-term memory. + """ config = getattr(engine, "config", {}) memory = ( persona_config(config).get("memory") if isinstance(config, Mapping) else None @@ -272,12 +353,23 @@ def persona_short_term_history_limit(engine: Any) -> int: def persona_history_limit() -> int: - """Return the default short-term memory length for Persona.""" + """Return the default short-term memory length for Persona. + + Returns: + int: Built-in fallback message-window length for Persona history. + """ return PERSONA_HISTORY_MESSAGES def persona_history_messages(engine: Any) -> list[JSON]: - """Return prior Persona chat messages in OpenAI chat format.""" + """Return prior Persona chat messages in OpenAI chat format. + + Args: + engine: Celune-like runtime object that stores prior Persona messages. + + Returns: + list[JSON]: Sanitized chat-history entries ready for the Persona API. + """ history = getattr(engine, "persona_history", []) if not isinstance(history, list): return [] @@ -302,7 +394,14 @@ def persona_history_messages(engine: Any) -> list[JSON]: def persona_attachment_source(path: str) -> str: - """Return a qwen-vl-utils-safe attachment path or URI.""" + """Return a qwen-vl-utils-safe attachment path or URI. + + Args: + path: Attachment path or file URI captured by the Persona UI. + + Returns: + str: Normalized path or URI suitable for Qwen vision attachments. + """ source = path.strip() if os.name == "nt" and source.startswith("file:///"): without_scheme = source.removeprefix("file:///") @@ -312,7 +411,14 @@ def persona_attachment_source(path: str) -> str: def persona_pending_attachments(engine: Any) -> list[JSON]: - """Return pending Persona attachments in Qwen chat content format.""" + """Return pending Persona attachments in Qwen chat content format. + + Args: + engine: Celune-like runtime object that stores staged Persona attachments. + + Returns: + list[JSON]: Attachment content blocks formatted for the Persona request. + """ attachments = getattr(engine, "persona_attachments", []) if not isinstance(attachments, list): return [] diff --git a/celune/persona/memory.py b/celune/persona/memory.py index 3daf8b8..528f9ec 100644 --- a/celune/persona/memory.py +++ b/celune/persona/memory.py @@ -14,12 +14,12 @@ import torch import torch.nn.functional as f -from transformers import AutoModel, AutoTokenizer import numpy as np import numpy.typing as npt +from transformers import AutoModel, AutoTokenizer -from ..constants import JSONSerializable, PERSONA_MEMORY_EMBEDDING_MODEL from ..paths import memory_data_dir +from ..constants import JSONSerializable, PERSONA_MEMORY_EMBEDDING_MODEL if TYPE_CHECKING: from transformers.modeling_utils import PreTrainedModel diff --git a/celune/persona/prompts.py b/celune/persona/prompts.py index e4d6ca4..1d99682 100644 --- a/celune/persona/prompts.py +++ b/celune/persona/prompts.py @@ -156,8 +156,13 @@ def render(self, message: str) -> str: # HACK: don't repeat this # the characters loved to repeat themselves for no apparent reason # thanks Qwen for me having to prompt engineer around this issue with both ChatGPT and Claude + # + # Qwen3-VL is also prone to this, the prompt probably sucks if last_assistant: - lines.append(f'[Do not reuse or rephrase: "{last_assistant[:120]}"]') + lines.append( + "[The assistant has already acknowledged the complaint about repetition. " + "Do not acknowledge it again. Move the conversation forward.]" + ) lines.append(f"user: {message}") return _render_lines(lines) diff --git a/celune/persona/runtime.py b/celune/persona/runtime.py index 285b249..34b96ad 100644 --- a/celune/persona/runtime.py +++ b/celune/persona/runtime.py @@ -5,198 +5,39 @@ import gc import threading import contextlib -from dataclasses import dataclass, field from collections.abc import Mapping, Sequence -from typing import Literal, Optional, Protocol, TypedDict, Union, cast +from typing import Optional, Union, cast import torch from transformers.tokenization_utils_base import BatchEncoding from transformers import ( - Qwen2_5_VLForConditionalGeneration, + Qwen3VLForConditionalGeneration, AutoProcessor, AutoTokenizer, AutoConfig, BitsAndBytesConfig, ) -from ..utils import discard +from ..utils import discard, normalize_special_characters from ..vram import resolve_vram_preset from ..constants import JSONSerializable, PERSONA_MODEL_ID, N_A_STR - -Role = Literal["system", "user", "assistant"] -type VideoMetadataScalar = Optional[Union[bool, int, float, str]] -type VisionInput = Union[JSONSerializable, torch.Tensor, bytes, memoryview] -type ProcessorKwargValue = Union[VideoMetadataScalar, Sequence[VideoMetadataScalar]] -type ModelGenerateKwargValue = Union[torch.Tensor, int, float, bool] - - -class TextContentItem(TypedDict): - """Text content block accepted by Persona chat messages.""" - - type: Literal["text"] - text: str - - -class ImageContentItem(TypedDict): - """Image content block accepted by Persona chat messages.""" - - type: Literal["image"] - image: str - - -class VideoContentItem(TypedDict): - """Video content block accepted by Persona chat messages.""" - - type: Literal["video"] - video: str - - -type ContentItem = Union[TextContentItem, ImageContentItem, VideoContentItem] -type VideoMetadata = dict[str, VideoMetadataScalar] -type VideoInputWithMetadata = tuple[VisionInput, VideoMetadata] -type VisionProcessorOutput = tuple[ - Optional[list[VisionInput]], - Optional[list[VideoInputWithMetadata]], - dict[str, ProcessorKwargValue], -] -type MessageContent = Union[str, list[ContentItem]] - - -class ChatMessagePayload(TypedDict): - """Serialized chat message structure used by the Persona runtime.""" - - role: Role - content: MessageContent - - -type JSONDict = ChatMessagePayload - - -class ChatTemplateRenderer(Protocol): - """Renderer supporting Hugging Face-style chat templates.""" - - def apply_chat_template( - self, - conversation: Sequence[ChatMessagePayload], - *, - tokenize: bool = False, - add_generation_prompt: bool = True, - return_dict: bool = True, - return_tensors: str = "pt", - ) -> Union[str, BatchEncoding]: - """Render or tokenize a chat conversation. - - Args: - conversation: The current Persona conversation. - tokenize: Whether the conversation should be tokenized. - add_generation_prompt: Whether the generation prompt should be appended. - return_dict: Whether a Python dict should be returned. - return_tensors: Whether PyTorch tensors should be returned. - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - -class PersonaTokenizer(Protocol): - """Tokenizer protocol used by the Persona runtime.""" - - eos_token_id: Optional[int] - - def __call__(self, *, text: str, return_tensors: str) -> BatchEncoding: - """Tokenize text into a batch encoding.""" - raise NotImplementedError("protocol not defined") - - def decode(self, token_ids: torch.Tensor, *, skip_special_tokens: bool) -> str: - """Decode generated token IDs into text. - - Args: - token_ids: The token IDs to decode. - skip_special_tokens: Whether special token IDs should be skipped while decoding. - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - -class PersonaProcessor(ChatTemplateRenderer, Protocol): - """Processor protocol used by the Persona runtime.""" - - tokenizer: Optional[PersonaTokenizer] - - def __call__( - self, - *, - text: str, - images: Optional[Sequence[VisionInput]] = None, - videos: Optional[Sequence[VisionInput]] = None, - video_metadata: Optional[Sequence[VideoMetadata]] = None, - return_tensors: str, - **kwargs: ProcessorKwargValue, - ) -> BatchEncoding: - """Build multimodal model inputs.""" - raise NotImplementedError("protocol not defined") - - -class PersonaModel(Protocol): - """Model protocol used by the Persona runtime.""" - - device: Union[torch.device, str] - - def generate(self, **kwargs: ModelGenerateKwargValue) -> torch.Tensor: - """Generate token IDs from prepared inputs. - - Args: - kwargs: Generation-specific keyword arguments to use. - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - def eval(self) -> None: - """Switch the model into eval mode. - - Raises: - NotImplementedError: The protocol was called directly. - """ - raise NotImplementedError("protocol not defined") - - -@dataclass(slots=True) -class ChatMessage: - """One OpenAI-style chat message.""" - - role: Role - content: MessageContent - - -@dataclass(slots=True) -class GenerateRequest: - """Celune-to-Persona generation request.""" - - model: Optional[str] = None - quantization: Optional[str] = None - quantized: bool = True - system: Optional[str] = None - user: Optional[str] = None - messages: list[ChatMessage] = field(default_factory=list) - max_new_tokens: int = 220 - temperature: float = 0.75 - top_p: float = 0.9 - repetition_penalty: float = 1.05 - - -@dataclass(slots=True) -class GenerateResponse: - """Persona generation response.""" - - text: str - response: str - model: str - quantization: str +from ..dataclasses.persona import ChatMessage, GenerateRequest, GenerateResponse +from ..typing.persona import ( + ChatMessagePayload, + ChatTemplateRenderer, + ContentItem, + ImageContentItem, + MessageContent, + PersonaModel, + PersonaProcessor, + PersonaTokenizer, + Role, + TextContentItem, + VideoContentItem, + VideoMetadata, + VisionInput, + VisionProcessorOutput, +) def _render_chat_prompt( @@ -265,6 +106,8 @@ def load(self, model_id: str, quantization: str) -> None: quantization: The requested quantization mode to use. Raises: + TypeError: Raised when the loaded processor or model has an unsupported type. + RuntimeError: Raised when the requested Persona model cannot be loaded. ValueError: Quantization was requested when CUDA wasn't available. """ if ( @@ -281,21 +124,20 @@ def load(self, model_id: str, quantization: str) -> None: trust_remote_code=True, ) model_type = getattr(config, "model_type", N_A_STR) + wanted_type = "qwen3_vl" - if model_type != "qwen2_5_vl": - raise ValueError( - f"unsupported model type {config.model_type}, expected qwen2_5_vl" + if model_type != wanted_type: + raise TypeError( + f"unsupported model type {config.model_type}, expected {wanted_type}" ) normalized = quantization.casefold() if normalized in {"4bit", "nf4", "bnb4", "bitsandbytes-4bit"}: if not torch.cuda.is_available(): - raise ValueError( - "Persona quantized loading requires a CUDA-enabled Torch build" - ) + raise RuntimeError("CUDA support required to quantize Persona") model = cast( PersonaModel, - Qwen2_5_VLForConditionalGeneration.from_pretrained( + Qwen3VLForConditionalGeneration.from_pretrained( model_id, trust_remote_code=True, device_map="auto", @@ -309,12 +151,10 @@ def load(self, model_id: str, quantization: str) -> None: ) elif normalized in {"8bit", "bnb8", "bitsandbytes-8bit"}: if not torch.cuda.is_available(): - raise ValueError( - "Persona quantized loading requires a CUDA-enabled Torch build" - ) + raise RuntimeError("CUDA support required to quantize Persona") model = cast( PersonaModel, - Qwen2_5_VLForConditionalGeneration.from_pretrained( + Qwen3VLForConditionalGeneration.from_pretrained( model_id, trust_remote_code=True, device_map="auto", @@ -324,7 +164,7 @@ def load(self, model_id: str, quantization: str) -> None: elif normalized in {"none", "false", "off", "disabled"}: model = cast( PersonaModel, - Qwen2_5_VLForConditionalGeneration.from_pretrained( + Qwen3VLForConditionalGeneration.from_pretrained( model_id, trust_remote_code=True, device_map="auto", @@ -432,7 +272,9 @@ def generate(self, request: GenerateRequest) -> GenerateResponse: input_ids = cast(torch.Tensor, inputs["input_ids"]) new_ids = output_ids[0, input_ids.shape[1] :] - text = tokenizer.decode(new_ids, skip_special_tokens=True).strip() + text = normalize_special_characters( + tokenizer.decode(new_ids, skip_special_tokens=True).strip() + ) return GenerateResponse( text=text, response=text, @@ -440,8 +282,8 @@ def generate(self, request: GenerateRequest) -> GenerateResponse: quantization=self.quantization, ) finally: - # Vision requests can allocate substantial transient GPU memory for - # decoded image/video inputs; drop those tensors as soon as the turn ends. + # vision requests can allocate a large amount of memory for image/video tensors + # drop them after the vision-related turn is complete, and let Celune know it from context discard(new_ids) discard(output_ids) discard(model_inputs) diff --git a/celune/pipeline.py b/celune/pipeline.py index 7eaa46f..3c2f341 100644 --- a/celune/pipeline.py +++ b/celune/pipeline.py @@ -6,14 +6,19 @@ import os import re import json +import sys import time import queue import random import pathlib import datetime import contextlib -from dataclasses import dataclass +import subprocess +from importlib import util as importlib_util +from collections import deque from typing import TYPE_CHECKING, Optional, Mapping, Union, cast +from urllib.parse import urlparse, urlencode +from urllib.request import urlopen import torch import numpy as np @@ -25,9 +30,16 @@ from iso639.exceptions import InvalidLanguageValue, DeprecatedLanguageValue from . import __version__ +from .dataclasses.pipeline import ( + PlaybackChunk, + PlaybackSourceDone, + SpeechRequest, + SpeechTiming, +) from .exceptions import NotAvailableError from .persona.memory import PersonaMemoryStore from .analysis import analyze_voice_audio +from .paths import app_data_dir, project_root, running_compiled from .persona.impl import ( default_persona_age, default_persona_context, @@ -52,6 +64,9 @@ _to_48khz, is_silent_utterance, readiness_signal, + sleeping_signal, + working_signal, + error_signal, ) from .utils import ( format_number, @@ -76,10 +91,9 @@ BASE_SR, JSON, JSONSerializable, - N_A_NUMERIC, PERSONA_MEMORY_EMBEDDING_MODEL, - PipelineStates, ) +from .typing.pipeline import SpeechStreamQueue if TYPE_CHECKING: from .celune import Celune @@ -88,74 +102,8 @@ _FLAC_STREAMINFO_BLOCK = 0 _FLAC_VORBIS_COMMENT_BLOCK = 4 _MAX_FLAC_METADATA_BLOCK_SIZE = 0xFFFFFF - - -@dataclass(frozen=True) -class SpeechRequest: - """Queued speech input and output persistence preference.""" - - text: str - display_text: str - language: str = "Auto" - save: bool = True - stream_queue: Optional["SpeechStreamQueue"] = None - normalize: bool = False - - -@dataclass(frozen=True) -class SpeechDone: - """Playback completion marker for one generated utterance.""" - - saved_path: Optional[str] = None - analysis_audio: Optional[npt.NDArray[np.float32]] = None - - -@dataclass -class SpeechTiming: - """Timing data for a generated speech utterance.""" - - start_time: float - first_chunk_time: Optional[float] = None - first_playback_time: Optional[float] = None - - def mark_first_chunk(self) -> None: - """Record when the backend yields its first audio chunk.""" - if self.first_chunk_time is None: - self.first_chunk_time = time.monotonic() - - def mark_first_playback(self) -> None: - """Record when the first audio chunk is sent to the output stream.""" - if self.first_playback_time is None: - self.first_playback_time = time.monotonic() - - def ttfc_ms(self) -> float: - """Return time to first generated chunk in milliseconds. - - Returns: - float: How much time it took to generate the first chunk. - """ - if self.first_chunk_time is None: - return N_A_NUMERIC - - return (self.first_chunk_time - self.start_time) * 1000 - - def ttfp_seconds(self) -> float: - """Return time to first playback in seconds. - - Returns: - float: How much time it took to play any part of the current utterance. - """ - if self.first_playback_time is None: - return N_A_NUMERIC - - return self.first_playback_time - self.start_time - - -type SpeechStreamItem = Optional[Union[npt.NDArray[np.float32], Exception]] -type SpeechStreamQueue = queue.Queue[SpeechStreamItem] -type TextQueueItem = Union[SpeechRequest, PipelineStates] -type AudioChunk = tuple[npt.NDArray[np.float32], int, Optional[SpeechTiming]] -type AudioQueueItem = Union[AudioChunk, SpeechDone, PipelineStates] +_SFX_DUCK_GAIN = 0.25 +_SFX_DUCK_FADE_SECONDS = 0.15 def _json_value(value: JSONSerializable) -> JSONSerializable: @@ -476,6 +424,13 @@ def close_stream(engine: Celune, abort: bool = False) -> None: engine._current_sr = None +def _reset_glow_audio_reactivity(engine: Celune) -> None: + """Clear any pending audio-reactive glow state after abrupt playback stops.""" + reset_audio_reactivity = getattr(engine.glow, "reset_audio_reactivity", None) + if callable(reset_audio_reactivity): + reset_audio_reactivity() + + def force_stop_speech(engine: Celune) -> bool: """Forcefully stop Celune from speaking. @@ -529,19 +484,324 @@ def acquire_pipeline(engine: Celune, action: str) -> bool: return True -def release_pipeline(engine: Celune) -> None: +def release_pipeline(engine: Celune, playback_idle: bool = True) -> None: """Release Celune's shared playback pipeline. Args: engine: The Celune engine that owns the playback pipeline. + playback_idle: Whether playback should be marked fully idle now. """ with engine.say_lock: engine.locked = False - engine.playback_done.set() - engine.cur_state = "idle" + if playback_idle: + engine.playback_done.set() + engine.cur_state = "idle" engine.log_dev("[LOCK] released") +def _next_playback_source_id(engine: Celune) -> int: + """Return the next monotonically increasing playback source id.""" + source_id = getattr(engine, "_next_playback_source_id", 0) + 1 + engine._next_playback_source_id = source_id + return source_id + + +def _register_overlay_playback(engine: Celune) -> None: + """Mark the mixer busy for a newly queued non-speech playback source.""" + with engine.say_lock: + if not engine.locked: + engine.cur_state = "speaking" + engine.playback_done.clear() + engine._ready_announced = False + + +def _playback_source_statuses(engine: Celune) -> dict[int, str]: + """Return the mutable per-source playback status map.""" + statuses = getattr(engine, "_playback_source_statuses", None) + if isinstance(statuses, dict): + return statuses + + statuses = {} + engine._playback_source_statuses = statuses + return statuses + + +def _playback_source_meta( + engine: Celune, +) -> dict[int, dict[str, Union[str, float]]]: + """Return per-source mixer metadata such as kind, gain state, and progress.""" + meta = getattr(engine, "_playback_source_meta", None) + if isinstance(meta, dict): + return meta + + meta = {} + engine._playback_source_meta = meta + return meta + + +def _register_playback_source( + engine: Celune, + source_id: int, + *, + kind: str, + base_gain: float = 1.0, +) -> None: + """Register one playback source for status and gain management.""" + clipped = float(np.clip(base_gain, 0.0, 1.0)) + _playback_source_meta(engine)[source_id] = { + "kind": kind, + "base_gain": clipped, + "current_gain": clipped, + "total_frames": 0.0, + "played_frames": 0.0, + } + + +def _set_playback_source_status(engine: Celune, source_id: int, status: str) -> None: + """Record and surface the current status for one active playback source.""" + statuses = _playback_source_statuses(engine) + statuses[source_id] = status + engine.status_callback(status) + + +def _clear_playback_source_status(engine: Celune, source_id: int) -> None: + """Forget one playback-source status and restore the next active status.""" + statuses = _playback_source_statuses(engine) + statuses.pop(source_id, None) + if statuses: + engine.status_callback(next(reversed(statuses.values()))) + _playback_source_meta(engine).pop(source_id, None) + + +def _queue_playback_chunk( + engine: Celune, + source_id: int, + audio: npt.NDArray[np.float32], + sample_rate: int, + timing: Optional[SpeechTiming] = None, +) -> None: + """Queue one chunk for the shared DSP playback mixer.""" + meta = _playback_source_meta(engine).get(source_id) + if isinstance(meta, dict): + meta["total_frames"] = float(meta.get("total_frames", 0.0)) + float(len(audio)) + + engine.audio_queue.put( + PlaybackChunk( + source_id=source_id, + audio=np.asarray(audio, dtype=np.float32), + sample_rate=sample_rate, + timing=timing, + ) + ) + + +def _update_playback_progress( + engine: Celune, + source_buffers: dict[ + int, deque[tuple[npt.NDArray[np.float32], Optional[SpeechTiming]]] + ], +) -> None: + """Reflect the active playback source position in the shared progress bar.""" + if not source_buffers: + return + + meta = _playback_source_meta(engine) + active_ids = [source_id for source_id in source_buffers if source_id in meta] + if not active_ids: + return + + source_id = max(active_ids) + source_meta = meta.get(source_id) + if not isinstance(source_meta, dict): + return + + total_frames = float(source_meta.get("total_frames", 0.0)) + played_frames = float(source_meta.get("played_frames", 0.0)) + if total_frames <= 0.0: + return + + now = time.monotonic() + last_emit_at = float(getattr(engine, "_playback_progress_last_emit_at", 0.0)) + last_source_id = getattr(engine, "_playback_progress_last_source_id", None) + emit_interval = 0.08 + if last_source_id == source_id and (now - last_emit_at) < emit_interval: + return + + engine._playback_progress_last_emit_at = now + engine._playback_progress_last_source_id = source_id + engine.progress_callback(min(played_frames, total_frames), total_frames) + + +def _active_speech_source_ids( + source_buffers: dict[ + int, deque[tuple[npt.NDArray[np.float32], Optional[SpeechTiming]]] + ], + engine: Celune, +) -> set[int]: + """Return active speech-source ids that should trigger SFX ducking.""" + meta = _playback_source_meta(engine) + return { + source_id + for source_id in source_buffers + if meta.get(source_id, {}).get("kind") == "speech" + } + + +def _apply_source_gain( + audio: npt.NDArray[np.float32], + source_id: int, + *, + speech_active: bool, + block_seconds: float, + engine: Celune, +) -> npt.NDArray[np.float32]: + """Apply ducking and smooth gain ramps for one mixer source block.""" + meta = _playback_source_meta(engine).get(source_id) + if not isinstance(meta, dict): + return audio + + kind = str(meta.get("kind", "sfx")) + base_gain = float(meta.get("base_gain", 1.0)) + current_gain = float(meta.get("current_gain", base_gain)) + if kind == "sfx": + target_gain = base_gain * (_SFX_DUCK_GAIN if speech_active else 1.0) + else: + target_gain = base_gain + + if abs(target_gain - current_gain) < 1e-6: + meta["current_gain"] = target_gain + return np.asarray(audio * target_gain, dtype=np.float32) + + fade_ratio = min(1.0, block_seconds / _SFX_DUCK_FADE_SECONDS) + next_gain = current_gain + (target_gain - current_gain) * fade_ratio + ramp = np.linspace(current_gain, next_gain, len(audio), dtype=np.float32) + meta["current_gain"] = next_gain + return np.asarray(audio * ramp[:, None], dtype=np.float32) + + +def _queue_playback_done( + engine: Celune, + source_id: int, + *, + release_pipeline_when_finished: bool = False, + saved_path: Optional[str] = None, + analysis_audio: Optional[npt.NDArray[np.float32]] = None, +) -> None: + """Queue a completion marker for one playback source.""" + engine.audio_queue.put( + PlaybackSourceDone( + source_id=source_id, + release_pipeline=release_pipeline_when_finished, + saved_path=saved_path, + analysis_audio=analysis_audio, + ) + ) + + +def _youtube_sfx_temp_path() -> pathlib.Path: + """Return the fixed temporary WAV path used for URL-backed SFX playback.""" + return app_data_dir(create=True) / "temp" / "temporary_audio.wav" + + +def _is_youtube_sfx_url(value: str) -> bool: + """Return whether ``value`` looks like a supported YouTube URL.""" + parsed = urlparse(value.strip()) + if parsed.scheme not in {"http", "https"}: + return False + host = (parsed.netloc or "").lower() + if host.startswith("www."): + host = host[4:] + return host in {"youtube.com", "youtu.be", "music.youtube.com"} + + +def _youtube_sfx_title(url: str) -> str: + """Return a friendly title for one YouTube URL when available.""" + query = urlencode({"url": url, "format": "json"}) + endpoint = f"https://www.youtube.com/oembed?{query}" + try: + with urlopen(endpoint, timeout=5) as response: + payload = json.loads(response.read().decode("utf-8")) + except Exception: + return "YouTube audio" + + title = payload.get("title") + if isinstance(title, str) and title.strip(): + return title.strip() + return "YouTube audio" + + +def _download_youtube_sfx( + engine: Celune, url: str +) -> Optional[tuple[pathlib.Path, str]]: + """Download one YouTube URL as a temporary WAV file for SFX playback.""" + yt_dlp_module = "yt_dlp" + if importlib_util.find_spec(yt_dlp_module) is None: + engine.log("yt-dlp is not installed, cannot play YouTube audio.", "warning") + engine.error_callback("yt-dlp is required for YouTube playback") + return None + + output_path = _youtube_sfx_temp_path() + output_path.parent.mkdir(parents=True, exist_ok=True) + with contextlib.suppress(OSError): + output_path.unlink(missing_ok=True) + + title = _youtube_sfx_title(url) + out_tmpl = str(output_path.with_suffix(".%(ext)s")) + engine.status_callback("Downloading audio") + engine.log(f"[SFX] Downloading audio from {url}...") + python_executable = sys.executable + if running_compiled(): + if os.name == "nt": + python_executable = str(project_root() / ".venv" / "Scripts" / "python.exe") + else: + python_executable = str(project_root() / ".venv" / "bin" / "python") + try: + completed = subprocess.run( + [ + python_executable, + "-m", + yt_dlp_module, + "--extract-audio", + "--audio-format", + "wav", + "--audio-quality", + "0", + "--no-playlist", + "--no-progress", + "--force-overwrites", + "--output", + out_tmpl, + url, + ], + check=False, + capture_output=True, + text=True, + timeout=30, + ) + + if completed.returncode != 0: + stderr = ( + completed.stderr.strip() or completed.stdout.strip() or "unknown error" + ) + engine.log("Could not download audio.", "warning") + engine.log(stderr, "warning") + engine.error_callback("Could not download YouTube audio") + return None + except subprocess.TimeoutExpired: + engine.log("Timed out downloading audio.", "warning") + engine.error_callback("Could not download YouTube audio") + return None + + if not output_path.exists(): + stderr = completed.stderr.strip() or completed.stdout.strip() or "unknown error" + engine.log("Downloader returned no file.", "warning") + engine.log(stderr, "warning") + engine.error_callback("Could not download YouTube audio") + return None + + return output_path, title + + def _config_text(engine: Celune, key: str, default: str) -> str: """Read a string configuration value with a fallback.""" value = engine.config.get(key) @@ -1067,12 +1327,17 @@ def queue_speech( language_meta = detect_language(text, list(engine.backend.supported_languages)) requested_language = engine.language + backend_name = str(getattr(engine.backend, "name", "")).strip().lower() if ( not isinstance(requested_language, str) or not requested_language.strip() or requested_language.strip().lower() == "auto" ): - requested_language = language_meta["language"] + # Qwen3 handles automatic language selection internally, so keep the + # backend-facing value as "Auto" instead of passing a langdetect code. + requested_language = ( + "Auto" if backend_name == "qwen3" else language_meta["language"] + ) if not language_meta["supported"]: # "zh-cn" has to be clipped to just "zh" to be a valid language code @@ -1134,6 +1399,7 @@ def queue_sfx_audio( sample_rate: int, label: str, keep: bool = False, + volume: float = 1.0, ) -> bool: """Queue decoded SFX audio through Celune's playback pipeline. @@ -1143,6 +1409,7 @@ def queue_sfx_audio( sample_rate: Source sample rate for the decoded audio. label: Human-readable label for logs and status. keep: Whether to prepend this SFX to the next saved utterance. + volume: Gain multiplier applied before the clip is queued for playback. Returns: bool: ``True`` when playback was queued successfully, otherwise ``False``. @@ -1150,9 +1417,6 @@ def queue_sfx_audio( Raises: Exception: Re-raised after releasing the pipeline if SFX playback setup fails. """ - if not acquire_pipeline(engine, "play"): - return False - try: audio = np.asarray(audio, dtype=np.float32) audio_len = len(audio) / sample_rate @@ -1164,26 +1428,32 @@ def queue_sfx_audio( if keep: engine.kept_sfx_audio = audio.copy() + source_id = _next_playback_source_id(engine) + _register_overlay_playback(engine) + _register_playback_source(engine, source_id, kind="sfx", base_gain=volume) engine.cur_state = "speaking" # push the smallest possible chunks for responsive stopping for chunk in _split(audio, BASE_SR, 1): - engine.audio_queue.put((chunk, BASE_SR, None)) - engine.audio_queue.put(engine.utterance_done) + _queue_playback_chunk(engine, source_id, chunk, BASE_SR) + _queue_playback_done(engine, source_id) - engine.status_callback(f"Playing {label}") + _set_playback_source_status(engine, source_id, f"Playing {label}") return True except Exception: - release_pipeline(engine) + engine.playback_done.set() raise -def play(engine: Celune, sound_path: str, keep: bool = False) -> bool: +def play( + engine: Celune, sound_path: str, keep: bool = False, volume: float = 1.0 +) -> bool: """Play a sound via Celune's pipeline. Args: engine: The Celune engine that should play the sound. sound_path: The path to the audio file to play. keep: Whether to prepend this SFX to the next saved utterance. + volume: How loud should the SFX be played at. Returns: bool: ``True`` when playback was queued successfully, otherwise ``False``. @@ -1191,6 +1461,15 @@ def play(engine: Celune, sound_path: str, keep: bool = False) -> bool: Raises: Exception: Re-raised after releasing the pipeline if SFX playback setup fails. """ + if _is_youtube_sfx_url(sound_path): + downloaded_info = _download_youtube_sfx(engine, sound_path) + if downloaded_info is None: + return False + downloaded, playback_label = downloaded_info + sound_path = str(downloaded) + else: + playback_label = sound_path + if not os.path.exists(sound_path): engine.log(f"{APP_NAME} cannot find {sound_path}.", "warning") return False @@ -1203,8 +1482,14 @@ def play(engine: Celune, sound_path: str, keep: bool = False) -> bool: return False audio, sr = sf.read(sound_path, dtype="float32") + return queue_sfx_audio( - engine, np.asarray(audio, dtype=np.float32), sr, sound_path, keep + engine, + np.asarray(audio, dtype=np.float32), + sr, + playback_label, + keep, + volume=volume, ) @@ -1367,19 +1652,37 @@ def split_units(value: str) -> list[str]: return chunks -def play_readiness_signal(engine: Celune) -> bool: +def play_signal(engine: Celune, signal_type: str) -> bool: """Queue a readiness signal to be played. Args: engine: The instance of Celune to do this with. + signal_type: The signal type to be played. Returns: bool: Whether the readiness signal was processed successfully. """ - if acquire_pipeline(engine, "play readiness signal"): + if signal_type == "readiness": + signal = readiness_signal() + elif signal_type == "working": + signal = working_signal() + elif signal_type == "sleeping": + signal = sleeping_signal() + elif signal_type == "error": + signal = error_signal() + else: + raise ValueError("no such signal") + + if acquire_pipeline(engine, f"play {signal_type} signal"): engine.cur_state = "speaking" - engine.audio_queue.put((readiness_signal(), BASE_SR, None)) - engine.audio_queue.put(engine.utterance_done) + source_id = _next_playback_source_id(engine) + _register_playback_source(engine, source_id, kind="sfx") + _queue_playback_chunk(engine, source_id, signal, BASE_SR) + _queue_playback_done( + engine, + source_id, + release_pipeline_when_finished=True, + ) return True return False @@ -1460,6 +1763,8 @@ def generation_worker(engine: Celune) -> None: buffer: list[npt.NDArray[np.float32]] = [] full_audio: list[npt.NDArray[np.float32]] = [] generated_text_parts: list[str] = [] + source_id = _next_playback_source_id(engine) + _register_playback_source(engine, source_id, kind="speech") for chunk_index, chunk_text in enumerate(chunks): if engine.exit_requested: @@ -1482,12 +1787,7 @@ def generation_worker(engine: Celune) -> None: generated_text_parts.append(chunk_text) is_first_chunk = chunk_index == 0 - progress_total = engine.backend.generation_progress_total( - chunk_text - ) - generated_steps = 0 last_timing: Optional[dict] = None - engine.progress_callback(0, progress_total or 1) with engine.model_lock: if engine.model is None: @@ -1558,15 +1858,6 @@ def generation_worker(engine: Celune) -> None: if engine.utterance_force_stop.is_set(): break - if progress_total is not None: - generated_steps += ( - engine.backend.generation_progress_steps(timing) - ) - engine.progress_callback( - min(generated_steps, progress_total), - progress_total, - ) - speech_timing.mark_first_chunk() if isinstance(audio_chunk, torch.Tensor): @@ -1614,12 +1905,12 @@ def generation_worker(engine: Celune) -> None: # buffering helps Celune speak smoothly when performance is bad if buffered_speech_len >= 10.0: queued_audio = np.concatenate(buffer) - engine.audio_queue.put( - ( - queued_audio, - BASE_SR, - speech_timing if not pushed_audio else None, - ) + _queue_playback_chunk( + engine, + source_id, + queued_audio, + BASE_SR, + speech_timing if not pushed_audio else None, ) if stream_queue is not None: stream_queue.put(queued_audio.copy()) @@ -1628,13 +1919,12 @@ def generation_worker(engine: Celune) -> None: if not pushed_audio: pushed_audio = True - engine.status_callback("Speaking") + _set_playback_source_status( + engine, source_id, "Speaking" + ) engine.cur_state = "speaking" engine.queue_avail_callback() - if progress_total is None: - engine.progress_callback(1, 1) - if ( not engine.exit_requested and not engine.utterance_force_stop.is_set() @@ -1674,18 +1964,18 @@ def generation_worker(engine: Celune) -> None: if buffer: queued_audio = np.concatenate(buffer) - engine.audio_queue.put( - ( - queued_audio, - BASE_SR, - speech_timing if not pushed_audio else None, - ) + _queue_playback_chunk( + engine, + source_id, + queued_audio, + BASE_SR, + speech_timing if not pushed_audio else None, ) if stream_queue is not None: stream_queue.put(queued_audio.copy()) if not pushed_audio: pushed_audio = True - engine.status_callback("Speaking") + _set_playback_source_status(engine, source_id, "Speaking") engine.cur_state = "speaking" engine.queue_avail_callback() @@ -1697,7 +1987,7 @@ def generation_worker(engine: Celune) -> None: if engine.reverb.strength > 0.0: tail = engine.reverb.flush() if len(tail) > 0: - engine.audio_queue.put((tail, BASE_SR, None)) + _queue_playback_chunk(engine, source_id, tail, BASE_SR) if stream_queue is not None: stream_queue.put(tail.copy()) buffer.append(tail) @@ -1775,11 +2065,12 @@ def generation_worker(engine: Celune) -> None: saved_path = None engine.recently_saved = saved_path - engine.audio_queue.put( - SpeechDone( - saved_path=saved_path, - analysis_audio=analysis_audio, - ) + _queue_playback_done( + engine, + source_id, + release_pipeline_when_finished=True, + saved_path=saved_path, + analysis_audio=analysis_audio, ) if stream_queue is not None: stream_queue.put(None) @@ -1801,16 +2092,157 @@ def generation_worker(engine: Celune) -> None: break +def _playback_blocks( + chunk: PlaybackChunk, + block_seconds: float = 0.05, +) -> deque[tuple[npt.NDArray[np.float32], Optional[SpeechTiming]]]: + """Split one queued source chunk into short blocks for the mixer.""" + blocks = deque[tuple[npt.NDArray[np.float32], Optional[SpeechTiming]]]() + pieces = _split(chunk.audio, chunk.sample_rate, block_seconds) + if not pieces: + pieces = [np.asarray(chunk.audio, dtype=np.float32)] + for index, piece in enumerate(pieces): + blocks.append( + (np.asarray(piece, dtype=np.float32), chunk.timing if index == 0 else None) + ) + return blocks + + +def _ensure_playback_stream(engine: Celune, sample_rate: int) -> bool: + """Ensure the shared playback stream exists for the requested sample rate.""" + if engine.stream is not None and getattr(engine, "current_sr", None) == sample_rate: + return True + + if engine.stream is not None and getattr(engine, "current_sr", None) != sample_rate: + close_stream(engine, abort=True) + + try: + engine.current_sr = sample_rate + engine.stream = sd.OutputStream( + samplerate=sample_rate, + channels=2, + dtype="float32", + blocksize=0, + ) + if engine.stream is None: + raise NotAvailableError("audio stream is not available") + engine.stream.start() + engine.log_dev(f"[PLAY] started stream at {sample_rate} Hz") + return True + except sd.PortAudioError: + if not getattr(engine, "audio_unavailable", False): + engine.log(f"{APP_NAME} could not initialize the audio stream.", "error") + engine.log("No suitable audio device is available.", "error") + engine.error_callback("No suitable audio devices") + engine._audio_unavailable = True + return False + + +def _finalize_playback_idle( + engine: Celune, + saved_path: Optional[str] = None, + analysis_audio: Optional[npt.NDArray[np.float32]] = None, +) -> None: + """Handle post-playback reactions when the mixer becomes fully idle.""" + _reset_glow_audio_reactivity(engine) + engine.progress_callback(1, 1) + engine.playback_done.set() + if not getattr(engine, "locked", False): + engine.cur_state = "idle" + engine.idle_callback() + + if random.random() < 0.01: + flavor_texts = [ + "I will speak.", + "I'll answer.", + "I'm always listening.", + "I'm all ears.", + "You shall hear.", + ] + + choice = random.choice(flavor_texts) + if choice == getattr(engine, "_last_flavor", None): + choice = random.choice(flavor_texts) + engine._last_flavor = choice + engine.log(f"Just type. {choice}") + else: + if engine.dev and saved_path is not None and analysis_audio is not None: + engine.log_dev("Analyzing...") + saved = pathlib.Path(saved_path) + run_async( + analyze_voice_audio, + analysis_audio, + BASE_SR, + saved.name, + saved.parent, + saved.stem, + engine.current_voice, + ) + + if not getattr(engine, "_ready_announced", False): + engine.log("Ready to speak.") + engine._ready_announced = True + + if torch.cuda.is_available(): + avail, total = tuple(v / 1024**3 for v in torch.cuda.mem_get_info(0)) + if avail <= total * 0.1: + engine.log( + f"{APP_NAME} is running out of VRAM. Check the bottom right of {APP_NAME}'s window to learn more.", + "warning", + ) + engine.log( + "Please close any memory-resident applications to improve performance.", + "warning", + ) + + def playback_worker(engine: Celune) -> None: - """Receive audio chunks and play them. + """Receive audio chunks from multiple sources, mix them, and play them. Args: - engine: The Celune engine whose audio queue should be played back. + engine: Celune runtime that owns playback queues, DSP state, and logs. Raises: - NotAvailableError: The audio stream is unavailable during playback. + NotAvailableError: Raised when no usable audio output backend is available. """ - started = False + source_buffers: dict[ + int, deque[tuple[npt.NDArray[np.float32], Optional[SpeechTiming]]] + ] = {} + source_done: dict[int, PlaybackSourceDone] = {} + stop_requested = False + + def drain_pending_items() -> bool: + nonlocal stop_requested + + while True: + try: + pending = engine.audio_queue.get_nowait() + except queue.Empty: + return True + + if pending is engine.sentinel: + stop_requested = True + return True + + if pending is engine.force_stop_marker: + source_buffers.clear() + source_done.clear() + _playback_source_statuses(engine).clear() + _playback_source_meta(engine).clear() + engine.utterance_force_stop.clear() + _reset_glow_audio_reactivity(engine) + close_stream(engine, abort=True) + engine.playback_done.set() + release_pipeline(engine) + engine.idle_callback() + return False + + if isinstance(pending, PlaybackChunk): + source_buffers.setdefault(pending.source_id, deque()).extend( + _playback_blocks(pending) + ) + elif isinstance(pending, PlaybackSourceDone): + source_done[pending.source_id] = pending while True: if engine.exit_requested: @@ -1822,140 +2254,183 @@ def playback_worker(engine: Celune) -> None: engine.idle_callback() return - if not started: - if engine.exit_requested: - continue - - item = engine.audio_queue.get() + try: + timeout = 0.01 if source_buffers else None + item = engine.audio_queue.get(timeout=timeout) + except queue.Empty: + item = None if item is engine.sentinel: break if item is engine.force_stop_marker: + source_buffers.clear() + source_done.clear() + _playback_source_statuses(engine).clear() + _playback_source_meta(engine).clear() engine.utterance_force_stop.clear() + _reset_glow_audio_reactivity(engine) close_stream(engine, abort=True) engine.playback_done.set() release_pipeline(engine) engine.idle_callback() - started = False continue - if engine.exit_requested: + if isinstance(item, PlaybackChunk): + source_buffers.setdefault(item.source_id, deque()).extend( + _playback_blocks(item) + ) + elif isinstance(item, PlaybackSourceDone): + source_done[item.source_id] = item + + if not drain_pending_items(): continue - if isinstance(item, SpeechDone) or item is engine.utterance_done: - saved_path = item.saved_path if isinstance(item, SpeechDone) else None - analysis_audio = ( - item.analysis_audio if isinstance(item, SpeechDone) else None - ) - engine.playback_done.set() + if engine.exit_requested: + continue - more_pending = (not engine.audio_queue.empty()) or ( - not engine.text_queue.empty() - ) + while source_buffers: + if not drain_pending_items(): + break - if more_pending: - silence = np.zeros((BASE_SR, 2), dtype=np.float32) - if engine.stream is not None and not engine.exit_requested: - engine.stream.write(silence) - else: + if not _ensure_playback_stream(engine, BASE_SR): + source_buffers.clear() + source_done.clear() + _playback_source_statuses(engine).clear() + _playback_source_meta(engine).clear() release_pipeline(engine) engine.idle_callback() + break - if random.random() < 0.01: - flavor_texts = [ - "I will speak.", - "I'll answer.", - "I'm always listening.", - "I'm all ears.", - "You shall hear.", - ] - - choice = random.choice(flavor_texts) + ready_ids = [ + source_id for source_id, blocks in source_buffers.items() if blocks + ] + if not ready_ids: + break - if choice == getattr(engine, "_last_flavor", None): - choice = random.choice(flavor_texts) + speech_active = bool(_active_speech_source_ids(source_buffers, engine)) + block_len = min( + len(source_buffers[source_id][0][0]) for source_id in ready_ids + ) + mixed = np.zeros((block_len, 2), dtype=np.float32) + timing_to_log: Optional[SpeechTiming] = None + completed_now: list[int] = [] + + for source_id in ready_ids: + block, timing = source_buffers[source_id][0] + block_audio = _apply_source_gain( + np.asarray(block[:block_len], dtype=np.float32), + source_id, + speech_active=speech_active, + block_seconds=block_len / BASE_SR, + engine=engine, + ) + mixed += block_audio + if timing_to_log is None and timing is not None: + timing_to_log = timing - engine._last_flavor = choice - engine.log(f"Just type. {choice}") + if len(block) == block_len: + source_buffers[source_id].popleft() else: - # queueing new speech during analysis may net you a reduced performance - if ( - engine.dev - and saved_path is not None - and analysis_audio is not None - ): - engine.log_dev("Analyzing...") - saved = pathlib.Path(saved_path) - run_async( - analyze_voice_audio, - analysis_audio, - BASE_SR, - saved.name, - saved.parent, - saved.stem, - engine.current_voice, - ) + source_buffers[source_id][0] = ( + np.asarray(block[block_len:], dtype=np.float32), + None, + ) - if not getattr(engine, "_ready_announced", False): - engine.log("Ready to speak.") - engine._ready_announced = True + source_meta = _playback_source_meta(engine).get(source_id) + if isinstance(source_meta, dict): + source_meta["played_frames"] = float( + source_meta.get("played_frames", 0.0) + ) + float(block_len) - if torch.cuda.is_available(): - avail, total = tuple( - v / 1024**3 for v in torch.cuda.mem_get_info(0) - ) - if avail <= total * 0.1: - engine.log( - f"{APP_NAME} is running out of VRAM. " - f"Check the bottom right of {APP_NAME}'s window to learn more.", - "warning", + if not source_buffers[source_id]: + if source_id in source_done: + completed_now.append(source_id) + del source_buffers[source_id] + + mixed = np.clip(mixed, -1.0, 1.0) + + try: + stream = engine.stream + if stream is None: + raise NotAvailableError("audio stream is not available") + log_first_playback(engine, timing_to_log) + engine.glow.schedule(mixed) + stream.write(mixed) + _update_playback_progress(engine, source_buffers) + except Exception as e: + engine.log(f"[PLAY ERROR] {format_error(e, engine.dev)}", "error") + engine.error_callback("Playback error") + close_stream(engine, abort=True) + engine._stream = None + engine._current_sr = None + source_buffers.clear() + source_done.clear() + _playback_source_statuses(engine).clear() + _playback_source_meta(engine).clear() + break + + while True: + newly_complete = [ + source_id + for source_id, marker in source_done.items() + if source_id not in source_buffers + ] + if not newly_complete: + break + + for source_id in newly_complete: + marker = source_done.pop(source_id) + engine.recently_saved = marker.saved_path + _clear_playback_source_status(engine, source_id) + if marker.release_pipeline: + release_pipeline( + engine, + playback_idle=not source_buffers + and engine.audio_queue.empty() + and engine.text_queue.empty(), ) - engine.log( - "Please close any memory-resident applications to improve performance.", - "warning", + if ( + not source_buffers + and engine.audio_queue.empty() + and engine.text_queue.empty() + ): + _finalize_playback_idle( + engine, + saved_path=marker.saved_path, + analysis_audio=marker.analysis_audio, ) - continue - audio_chunk, sr, timing = item + while True: + orphaned = [ + source_id + for source_id, marker in source_done.items() + if source_id not in source_buffers + ] + if not orphaned: + break - if engine.stream is None: - try: - engine.current_sr = sr - engine.stream = sd.OutputStream( - samplerate=sr, - channels=2, - dtype="float32", - blocksize=0, - ) - if engine.stream is None: - raise NotAvailableError("audio stream is not initialized") - - engine.stream.start() - started = True - engine.log_dev(f"[PLAY] started stream at {sr} Hz") - except sd.PortAudioError: - if not engine.audio_unavailable: - engine.log( - f"{APP_NAME} could not initialize the audio stream.", "error" + for source_id in orphaned: + marker = source_done.pop(source_id) + engine.recently_saved = marker.saved_path + _clear_playback_source_status(engine, source_id) + if marker.release_pipeline: + release_pipeline( + engine, + playback_idle=not source_buffers + and engine.audio_queue.empty() + and engine.text_queue.empty(), + ) + if ( + not source_buffers + and engine.audio_queue.empty() + and engine.text_queue.empty() + ): + _finalize_playback_idle( + engine, + saved_path=marker.saved_path, + analysis_audio=marker.analysis_audio, ) - engine.log("No suitable audio device is available.", "error") - engine.error_callback("No suitable audio devices") - engine._audio_unavailable = True - - if engine.exit_requested: - continue - try: - if engine.stream is None: - raise NotAvailableError("audio stream is not initialized") - log_first_playback(engine, timing) - engine.glow.schedule(audio_chunk) - engine.stream.write(audio_chunk) - except Exception as e: - engine.log(f"[PLAY ERROR] {format_error(e, engine.dev)}", "error") - engine.error_callback("Playback error") - close_stream(engine, abort=True) - engine._stream = None - engine._current_sr = None - continue + if stop_requested and not source_buffers and not source_done: + break diff --git a/celune/runtime.py b/celune/runtime.py index 7fb108a..c7921ee 100644 --- a/celune/runtime.py +++ b/celune/runtime.py @@ -7,9 +7,9 @@ import torch +from .constants import APP_NAME from .utils import cuda_architecture, format_number from . import __codename__, __comment__, __version__ -from .constants import APP_NAME def log_runtime_banner(log: Callable[[str, str], None], backend_name: str) -> None: diff --git a/celune/terminal.py b/celune/terminal.py index 54a3b57..fbbe67a 100644 --- a/celune/terminal.py +++ b/celune/terminal.py @@ -4,15 +4,10 @@ import os import sys import ctypes -from collections.abc import Mapping -from typing import IO, Final, Literal, Optional, Callable, Any, cast +from typing import IO, Final, Optional, Callable, Any, cast from .config import config_value -from .constants import JSONSerializable - -type Config = Mapping[str, JSONSerializable] -type ColorMode = Literal["auto", "truecolor", "terminal-default", "ansi", "none"] -type ResolvedColorMode = Literal["truecolor", "terminal-default", "ansi", "none"] +from .typing.common import ColorMode, Config, JSONSerializable, ResolvedColorMode VALID_COLOR_MODES: Final[frozenset[str]] = frozenset( {"auto", "truecolor", "terminal-default", "ansi", "none"} diff --git a/celune/typing/__init__.py b/celune/typing/__init__.py new file mode 100644 index 0000000..85c0f80 --- /dev/null +++ b/celune/typing/__init__.py @@ -0,0 +1,265 @@ +"""Unified Celune type package with lazy re-exports.""" + +from importlib import import_module +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .analysis import ( + EmbeddingModel, + EmbeddingOutput, + EmbeddingPayload, + EmbeddingProcessor, + TextConfig, + TextConfigValue, + VoiceMatch, + ) + from .backends import ( + BackendModel, + MiniModel, + MiniPromptState, + ModelT, + ) + from .celune import ( + ErrorCallback, + Generative, + IdleCallback, + InputStateCallback, + MessageCallback, + NormalizerTokenizer, + ProgressCallback, + QueueAvailableCallback, + ReleasableObject, + SupportsClose, + SupportsUnload, + VoiceChangedCallback, + VoiceLockStateCallback, + ) + from .cevoice import ( + Manifest, + ManifestValue, + VoiceManifest, + ) + from .common import ( + ColorMode, + Config, + JSON, + JSONSerializable, + RGB, + ResolvedColorMode, + TerminalConfig, + VideoMetadataScalar, + VramTier, + ) + from .extensions import ( + DevLogCallable, + GetStateCallable, + LogCallable, + PlayCallable, + SayCallable, + SetVoiceCallable, + StatusCallable, + ThinkCallable, + WaitUntilReadyCallable, + ) + from .persona import ( + ChatMessagePayload, + ChatTemplateRenderer, + ContentItem, + ImageContentItem, + JSONDict, + MessageContent, + ModelGenerateKwargValue, + PersonaModel, + PersonaProcessor, + PersonaTokenizer, + ProcessorKwargValue, + Role, + TextContentItem, + VideoContentItem, + VideoInputWithMetadata, + VideoMetadata, + VisionInput, + VisionProcessorOutput, + ) + from .pipeline import ( + AudioChunk, + AudioQueueItem, + SpeechStreamItem, + SpeechStreamQueue, + TextQueueItem, + ) + from .ui import ( + CeluneBaseUI, + CeluneHeadlessBaseUI, + CeluneTextualUI, + ) + from .utils import ( + CallerInfo, + LanguageResult, + ) + +_MODULE_EXPORTS = { + "AudioChunk": "pipeline", + "AudioQueueItem": "pipeline", + "BackendModel": "backends", + "CallerInfo": "utils", + "CeluneBaseUI": "ui", + "CeluneHeadlessBaseUI": "ui", + "CeluneTextualUI": "ui", + "ChatMessagePayload": "persona", + "ChatTemplateRenderer": "persona", + "ColorMode": "common", + "Config": "common", + "ContentItem": "persona", + "DevLogCallable": "extensions", + "EmbeddingModel": "analysis", + "EmbeddingOutput": "analysis", + "EmbeddingPayload": "analysis", + "EmbeddingProcessor": "analysis", + "ErrorCallback": "celune", + "Generative": "celune", + "GetStateCallable": "extensions", + "IdleCallback": "celune", + "ImageContentItem": "persona", + "InputStateCallback": "celune", + "JSON": "common", + "JSONDict": "persona", + "JSONSerializable": "common", + "LanguageResult": "utils", + "LogCallable": "extensions", + "Manifest": "cevoice", + "ManifestValue": "cevoice", + "MessageCallback": "celune", + "MessageContent": "persona", + "MiniModel": "backends", + "MiniPromptState": "backends", + "ModelGenerateKwargValue": "persona", + "ModelT": "backends", + "NormalizerTokenizer": "celune", + "PersonaModel": "persona", + "PersonaProcessor": "persona", + "PersonaTokenizer": "persona", + "PlayCallable": "extensions", + "ProcessorKwargValue": "persona", + "ProgressCallback": "celune", + "QueueAvailableCallback": "celune", + "ReleasableObject": "celune", + "ResolvedColorMode": "common", + "RGB": "common", + "Role": "persona", + "SayCallable": "extensions", + "SetVoiceCallable": "extensions", + "SpeechStreamItem": "pipeline", + "SpeechStreamQueue": "pipeline", + "StatusCallable": "extensions", + "SupportsClose": "celune", + "SupportsUnload": "celune", + "TerminalConfig": "common", + "TextConfig": "analysis", + "TextConfigValue": "analysis", + "TextQueueItem": "pipeline", + "TextContentItem": "persona", + "ThinkCallable": "extensions", + "VideoContentItem": "persona", + "VideoInputWithMetadata": "persona", + "VideoMetadata": "persona", + "VideoMetadataScalar": "common", + "VisionInput": "persona", + "VisionProcessorOutput": "persona", + "VoiceChangedCallback": "celune", + "VoiceLockStateCallback": "celune", + "VoiceManifest": "cevoice", + "VoiceMatch": "analysis", + "VramTier": "common", + "WaitUntilReadyCallable": "extensions", +} + +__all__ = [ + "AudioChunk", + "AudioQueueItem", + "BackendModel", + "CallerInfo", + "CeluneBaseUI", + "CeluneHeadlessBaseUI", + "CeluneTextualUI", + "ChatMessagePayload", + "ChatTemplateRenderer", + "ColorMode", + "Config", + "ContentItem", + "DevLogCallable", + "EmbeddingModel", + "EmbeddingOutput", + "EmbeddingPayload", + "EmbeddingProcessor", + "ErrorCallback", + "Generative", + "GetStateCallable", + "IdleCallback", + "ImageContentItem", + "InputStateCallback", + "JSON", + "JSONDict", + "JSONSerializable", + "LanguageResult", + "LogCallable", + "Manifest", + "ManifestValue", + "MessageCallback", + "MessageContent", + "MiniModel", + "MiniPromptState", + "ModelGenerateKwargValue", + "ModelT", + "NormalizerTokenizer", + "PersonaModel", + "PersonaProcessor", + "PersonaTokenizer", + "PlayCallable", + "ProcessorKwargValue", + "ProgressCallback", + "QueueAvailableCallback", + "ReleasableObject", + "ResolvedColorMode", + "RGB", + "Role", + "SayCallable", + "SetVoiceCallable", + "SpeechStreamItem", + "SpeechStreamQueue", + "StatusCallable", + "SupportsClose", + "SupportsUnload", + "TerminalConfig", + "TextConfig", + "TextConfigValue", + "TextQueueItem", + "TextContentItem", + "ThinkCallable", + "VideoContentItem", + "VideoInputWithMetadata", + "VideoMetadata", + "VideoMetadataScalar", + "VisionInput", + "VisionProcessorOutput", + "VoiceChangedCallback", + "VoiceLockStateCallback", + "VoiceManifest", + "VoiceMatch", + "VramTier", + "WaitUntilReadyCallable", +] + + +def __getattr__(name: str): + """Resolve type exports lazily to avoid package import cycles.""" + module_name = _MODULE_EXPORTS.get(name) + if module_name is None: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + module = import_module(f"{__name__}.{module_name}") + return getattr(module, name) + + +def __dir__() -> list[str]: + """Return the lazily exported package surface.""" + return __all__ diff --git a/celune/typing/analysis.py b/celune/typing/analysis.py new file mode 100644 index 0000000..43ee37e --- /dev/null +++ b/celune/typing/analysis.py @@ -0,0 +1,64 @@ +"""Analysis-specific protocols and type aliases.""" + +from collections.abc import Mapping +from typing import Protocol, TypedDict, Union + +import torch +import numpy as np +import numpy.typing as npt + +TextConfigValue = Union[str, dict[str, "TextConfigValue"]] +TextConfig = dict[str, TextConfigValue] +EmbeddingPayload = Union[ + torch.Tensor, + npt.NDArray[np.float32], + list[float], + Mapping[str, "EmbeddingPayload"], +] + + +class EmbeddingOutput(Protocol): + """Speaker embedding model output used by Celune analysis.""" + + last_hidden_state: EmbeddingPayload + + +class EmbeddingProcessor(Protocol): + """Processor callable returned by the embedding model package.""" + + def __call__( + self, + y: npt.NDArray[np.float32], + *, + sampling_rate: int, + ) -> Mapping[str, torch.Tensor]: + """Prepare model inputs from a waveform.""" + raise NotImplementedError("protocol not defined") + + +class EmbeddingModel(Protocol): + """Embedding model behavior used by Celune analysis.""" + + def eval(self) -> None: + """Switch the model into evaluation mode.""" + raise NotImplementedError("protocol not defined") + + def to(self, device: torch.device) -> torch.nn.Module: + """Move the model to a device. + + Args: + device: Destination device for the embedding model. + """ + raise NotImplementedError("protocol not defined") + + def __call__(self, **inputs: torch.Tensor) -> EmbeddingOutput: + """Run embedding inference.""" + raise NotImplementedError("protocol not defined") + + +class VoiceMatch(TypedDict): + """Similarity score for one reference voice.""" + + voice: str + cosine: float + percent: float diff --git a/celune/typing/backends.py b/celune/typing/backends.py new file mode 100644 index 0000000..4f720c1 --- /dev/null +++ b/celune/typing/backends.py @@ -0,0 +1,41 @@ +"""Backend-facing protocols and type aliases.""" + +from collections.abc import Iterator +from typing import Protocol, TypeVar + +import torch + + +class BackendModel(Protocol): + """Opaque backend model protocol for backend-independent storage.""" + + +ModelT = TypeVar("ModelT", bound=BackendModel) +MiniPromptState = dict[str, dict[str, torch.Tensor]] + + +class MiniModel(Protocol): + """Pocket TTS model surface used by Celune's mini backend.""" + + sample_rate: int + + def get_state_for_audio_prompt(self, audio_conditioning: str) -> MiniPromptState: + """Return a reusable prompt state for one reference audio path. + + Args: + audio_conditioning: Backend-specific prompt descriptor for one voice sample. + """ + raise NotImplementedError("protocol not defined") + + def generate_audio_stream( + self, + model_state: MiniPromptState, + text_to_generate: str, + ) -> Iterator[torch.Tensor]: + """Yield streamed audio chunks for one prompt state and text. + + Args: + model_state: Prompt state cached for the active voice. + text_to_generate: Text content to synthesize. + """ + raise NotImplementedError("protocol not defined") diff --git a/celune/typing/celune.py b/celune/typing/celune.py new file mode 100644 index 0000000..6f4a1a2 --- /dev/null +++ b/celune/typing/celune.py @@ -0,0 +1,240 @@ +"""Core Celune protocols and callback types.""" + +from __future__ import annotations + +from collections.abc import Iterator +from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol, Union + +import torch +from transformers.modeling_utils import PreTrainedModel +from transformers.tokenization_utils_base import BatchEncoding, PreTrainedTokenizerBase + +from .common import Config, JSONSerializable + +if TYPE_CHECKING: + import queue + import threading + + import numpy as np + import numpy.typing as npt + import sounddevice as sd + + from ..backends import BackendModel, CeluneBackend + from ..cevoice import CEVoicePersona + from ..chroma import AudioRGBGlow + from ..constants import PipelineStates + from ..dsp import StreamingPedalboardReverb + from ..extensions.manager import CeluneExtensionManager + from ..persona.impl import PersonaClient + + +class SupportsClose(Protocol): + """Protocol for objects that can be closed.""" + + def close(self) -> None: + """Release any resources owned by the object.""" + raise NotImplementedError("protocol not defined") + + +class SupportsUnload(Protocol): + """Protocol for objects that can unload their runtime state.""" + + def unload(self) -> None: + """Unload any optional runtime state owned by the object.""" + raise NotImplementedError("protocol not defined") + + +class Generative(Protocol): + """Protocol for normalization-capable language models.""" + + def generate(self, **kwargs: Any) -> torch.Tensor: + """Generate token IDs from the provided model inputs. + + Args: + kwargs: Backend-specific generation keyword arguments. + """ + raise NotImplementedError("protocol not defined") + + def device(self) -> Union[torch.device, str]: + """Return the device used by the generative model.""" + raise NotImplementedError("protocol not defined") + + def parameters(self) -> Iterator[torch.nn.Parameter]: + """Iterate over the model parameters.""" + raise NotImplementedError("protocol not defined") + + +ReleasableObject = Union[ + SupportsClose, + SupportsUnload, + PreTrainedModel, + PreTrainedTokenizerBase, +] + + +class NormalizerTokenizer(Protocol): + """Tokenizer behavior CeluneNorm uses during normalization.""" + + unk_token_id: Optional[int] + pad_token_id: Optional[int] + eos_token_id: Optional[int] + + def convert_tokens_to_ids(self, tokens: str) -> Optional[int]: + """Convert one token to its integer ID. + + Args: + tokens: Token text to resolve into an integer ID. + """ + raise NotImplementedError("protocol not defined") + + def __call__( + self, + text: str, + *, + return_tensors: str, + add_special_tokens: bool, + ) -> BatchEncoding: + """Tokenize text for model input.""" + raise NotImplementedError("protocol not defined") + + def decode( + self, + token_ids: torch.Tensor, + *, + skip_special_tokens: bool, + ) -> Union[str, list[str]]: + """Decode generated token IDs. + + Args: + token_ids: Generated token IDs to decode. + skip_special_tokens: Whether special tokens should be omitted. + """ + raise NotImplementedError("protocol not defined") + + +class MessageCallback(Protocol): + """Callback accepting a message and optional severity.""" + + def __call__(self, msg: str, severity: str = "info") -> None: + """Handle a message emitted by Celune.""" + raise NotImplementedError("protocol not defined") + + +class InputStateCallback(Protocol): + """Callback accepting an input lock state.""" + + def __call__(self, locked: bool) -> None: + """Handle input lock-state changes.""" + raise NotImplementedError("protocol not defined") + + +class VoiceLockStateCallback(Protocol): + """Callback accepting a voice lock state.""" + + def __call__(self, locked: bool) -> None: + """Handle voice lock-state changes.""" + raise NotImplementedError("protocol not defined") + + +class ProgressCallback(Protocol): + """Callback accepting progress and total values.""" + + def __call__(self, progress: Optional[float], total: Optional[float]) -> None: + """Handle a progress update emitted by Celune.""" + raise NotImplementedError("protocol not defined") + + +ErrorCallback = Callable[[str], None] +IdleCallback = Callable[[], None] +QueueAvailableCallback = Callable[[], None] +VoiceChangedCallback = Callable[[str], None] + + +class CeluneStateAccessors: + """Typed attribute surface exposed by ``Celune`` via forwarded properties.""" + + log_callback: MessageCallback + status_callback: MessageCallback + error_callback: Callable[[str], None] + idle_callback: Callable[[], None] + queue_avail_callback: Callable[[], None] + voice_changed_callback: Callable[[str], None] + change_input_state_callback: InputStateCallback + change_voice_lock_state_callback: VoiceLockStateCallback + progress_callback: ProgressCallback + config: Config + _backend_spec: Optional[Union[str, type["CeluneBackend"]]] + _backend_kwargs: dict[str, JSONSerializable] + backend: "CeluneBackend" + tts_backend: str + chunk_size: int + language: str + dev: bool + use_normalization: bool + model: Optional["BackendModel"] + model_name: str + llm: Optional[PreTrainedModel] + tokenizer: Optional[PreTrainedTokenizerBase] + _last_warmup_error: Optional[Exception] + _normalizer_load_epoch: int + current_voice: Optional[str] + current_character: Optional[str] + current_character_persona: Optional["CEVoicePersona"] + voice_bundle_is_default: bool + persona_history: list[dict[str, str]] + persona_attachments: list[dict[str, str]] + voices: tuple[str, ...] + voice_prompt: Optional[str] + text_queue: "queue.Queue" + audio_queue: "queue.Queue" + _playback_thread: Optional["threading.Thread"] + _generation_thread: Optional["threading.Thread"] + _api_thread: Optional["threading.Thread"] + _persona_thread: Optional["threading.Thread"] + _queue_lock: "threading.Lock" + _utterance_force_stop: "threading.Event" + _next_playback_source_id: int + _playback_source_statuses: dict[int, str] + _playback_source_meta: dict[int, dict[str, Union[str, float]]] + _playback_progress_last_emit_at: float + _playback_progress_last_source_id: int + _model_ready: "threading.Event" + _playback_done: "threading.Event" + _say_lock: "threading.Lock" + _wake_lock: "threading.Lock" + _model_lock: "threading.RLock" + _exit_requested: bool + _stream: Optional["sd.OutputStream"] + _current_sr: Optional[int] + _audio_unavailable: bool + can_use_rubberband: bool + speed: float + reverb: "StreamingPedalboardReverb" + recently_saved: Optional[str] + kept_sfx_audio: Optional["npt.NDArray[np.float32]"] + regenerate: bool + locked: bool + loaded: bool + sleeping: bool + _last_flavor: Optional[str] + _ready_announced: bool + cur_state: str + is_in_tutorial: bool + extension_manager: Optional["CeluneExtensionManager"] + glow: "AudioRGBGlow" + vision: Optional["PersonaClient"] + stream: Optional["sd.OutputStream"] + say_lock: "threading.Lock" + utterance_force_stop: "threading.Event" + queue_lock: "threading.Lock" + force_stop_marker: "PipelineStates" + playback_done: "threading.Event" + model_ready: "threading.Event" + utterance_done: "PipelineStates" + sentinel: "PipelineStates" + generation_thread: Optional["threading.Thread"] + playback_thread: Optional["threading.Thread"] + exit_requested: bool + model_lock: "threading.RLock" + audio_unavailable: bool + current_sr: Optional[int] diff --git a/celune/typing/cevoice.py b/celune/typing/cevoice.py new file mode 100644 index 0000000..999112b --- /dev/null +++ b/celune/typing/cevoice.py @@ -0,0 +1,9 @@ +"""CEVOICE manifest type aliases.""" + +from typing import Union + +from .common import JSONSerializable + +ManifestValue = Union[JSONSerializable, "Manifest"] +Manifest = dict[str, ManifestValue] +VoiceManifest = dict[str, Manifest] diff --git a/celune/typing/common.py b/celune/typing/common.py new file mode 100644 index 0000000..d1117b4 --- /dev/null +++ b/celune/typing/common.py @@ -0,0 +1,22 @@ +"""Shared Celune type aliases.""" + +from collections.abc import Mapping +from typing import Literal, Optional, Union + +JSONSerializable = Union[ + None, + bool, + int, + float, + str, + list["JSONSerializable"], + dict[str, "JSONSerializable"], +] +JSON = dict[str, JSONSerializable] +RGB = tuple[int, int, int] +Config = dict[str, JSONSerializable] +TerminalConfig = Mapping[str, JSONSerializable] +ColorMode = Literal["auto", "truecolor", "terminal-default", "ansi", "none"] +ResolvedColorMode = Literal["truecolor", "terminal-default", "ansi", "none"] +VramTier = Literal["low", "medium", "high", "xhigh"] +VideoMetadataScalar = Optional[Union[bool, int, float, str]] diff --git a/celune/typing/extensions.py b/celune/typing/extensions.py new file mode 100644 index 0000000..7b054fd --- /dev/null +++ b/celune/typing/extensions.py @@ -0,0 +1,96 @@ +"""Extension-facing protocols.""" + +from typing import Optional, Protocol, runtime_checkable + +from ..exceptions import IncompleteExtensionError + + +@runtime_checkable +class LogCallable(Protocol): + """Extension callable logging annotation.""" + + def __call__(self, msg: str, severity: str = "info") -> None: + """Emit a log message.""" + raise IncompleteExtensionError("protocol not defined") + + +@runtime_checkable +class DevLogCallable(Protocol): + """Extension callable developer logging annotation.""" + + def __call__(self, msg: str, severity: str = "info") -> None: + """Emit a developer log message.""" + raise IncompleteExtensionError("protocol not defined") + + +@runtime_checkable +class SayCallable(Protocol): + """Extension callable speech request annotation.""" + + def __call__( + self, + text: str, + save: bool = True, + display_text: Optional[str] = None, + ) -> bool: + """Queue text for speech.""" + raise IncompleteExtensionError("protocol not defined") + + +@runtime_checkable +class ThinkCallable(Protocol): + """Extension callable think request annotation.""" + + def __call__(self, text: str) -> bool: + """Start a think request.""" + raise IncompleteExtensionError("protocol not defined") + + +@runtime_checkable +class PlayCallable(Protocol): + """Extension callable play request annotation.""" + + def __call__( + self, + sound_path: str, + keep: bool = False, + volume: float = 1.0, + ) -> bool: + """Queue an audio file for playback.""" + raise IncompleteExtensionError("protocol not defined") + + +@runtime_checkable +class StatusCallable(Protocol): + """Extension callable status update annotation.""" + + def __call__(self, msg: str, severity: str = "info") -> None: + """Emit a status update.""" + raise IncompleteExtensionError("protocol not defined") + + +@runtime_checkable +class SetVoiceCallable(Protocol): + """Extension callable voice setting request annotation.""" + + def __call__(self, name: str) -> bool: + """Request a voice change.""" + raise IncompleteExtensionError("protocol not defined") + + +@runtime_checkable +class GetStateCallable(Protocol): + """Extension callable state read annotation.""" + + def __call__(self) -> str: + """Read the current runtime state.""" + raise IncompleteExtensionError("protocol not defined") + + +@runtime_checkable +class WaitUntilReadyCallable(Protocol): + """Extension callable wait-until-ready annotation.""" + + def __call__(self, timeout: float = 30.0) -> bool: + """Wait for Celune to become ready.""" + raise IncompleteExtensionError("protocol not defined") diff --git a/celune/typing/persona.py b/celune/typing/persona.py new file mode 100644 index 0000000..dcc49ec --- /dev/null +++ b/celune/typing/persona.py @@ -0,0 +1,136 @@ +"""Persona runtime protocols and type aliases.""" + +from collections.abc import Sequence +from typing import Literal, Optional, Protocol, TypedDict, Union + +import torch +from transformers.tokenization_utils_base import BatchEncoding + +from .common import JSONSerializable, VideoMetadataScalar + +Role = Literal["system", "user", "assistant"] +VisionInput = Union[JSONSerializable, torch.Tensor, bytes, memoryview] +ProcessorKwargValue = Union[VideoMetadataScalar, Sequence[VideoMetadataScalar]] +ModelGenerateKwargValue = Union[torch.Tensor, int, float, bool] + + +class TextContentItem(TypedDict): + """Text content block accepted by Persona chat messages.""" + + type: Literal["text"] + text: str + + +class ImageContentItem(TypedDict): + """Image content block accepted by Persona chat messages.""" + + type: Literal["image"] + image: str + + +class VideoContentItem(TypedDict): + """Video content block accepted by Persona chat messages.""" + + type: Literal["video"] + video: str + + +ContentItem = Union[TextContentItem, ImageContentItem, VideoContentItem] +VideoMetadata = dict[str, VideoMetadataScalar] +VideoInputWithMetadata = tuple[VisionInput, VideoMetadata] +VisionProcessorOutput = tuple[ + Optional[list[VisionInput]], + Optional[list[VideoInputWithMetadata]], + dict[str, ProcessorKwargValue], +] +MessageContent = Union[str, list[ContentItem]] + + +class ChatMessagePayload(TypedDict): + """Serialized chat message structure used by the Persona runtime.""" + + role: Role + content: MessageContent + + +JSONDict = ChatMessagePayload + + +class ChatTemplateRenderer(Protocol): + """Renderer supporting Hugging Face-style chat templates.""" + + def apply_chat_template( + self, + conversation: Sequence[ChatMessagePayload], + *, + tokenize: bool = False, + add_generation_prompt: bool = True, + return_dict: bool = True, + return_tensors: str = "pt", + ) -> Union[str, BatchEncoding]: + """Render or tokenize a chat conversation. + + Args: + conversation: Persona chat history to render. + tokenize: Whether the rendered conversation should be tokenized. + add_generation_prompt: Whether to append an assistant generation turn. + return_dict: Whether structured tensor output should be returned. + return_tensors: Tensor backend requested by the caller. + """ + raise NotImplementedError("protocol not defined") + + +class PersonaTokenizer(Protocol): + """Tokenizer protocol used by the Persona runtime.""" + + eos_token_id: Optional[int] + + def __call__(self, *, text: str, return_tensors: str) -> BatchEncoding: + """Tokenize text into a batch encoding.""" + raise NotImplementedError("protocol not defined") + + def decode(self, token_ids: torch.Tensor, *, skip_special_tokens: bool) -> str: + """Decode generated token IDs into text. + + Args: + token_ids: Generated token IDs to decode. + skip_special_tokens: Whether special tokens should be omitted. + """ + raise NotImplementedError("protocol not defined") + + +class PersonaProcessor(ChatTemplateRenderer, Protocol): + """Processor protocol used by the Persona runtime.""" + + tokenizer: Optional[PersonaTokenizer] + + def __call__( + self, + *, + text: str, + images: Optional[Sequence[VisionInput]] = None, + videos: Optional[Sequence[VisionInput]] = None, + video_metadata: Optional[Sequence[VideoMetadata]] = None, + return_tensors: str, + **kwargs: ProcessorKwargValue, + ) -> BatchEncoding: + """Build multimodal model inputs.""" + raise NotImplementedError("protocol not defined") + + +class PersonaModel(Protocol): + """Model protocol used by the Persona runtime.""" + + device: Union[torch.device, str] + + def generate(self, **kwargs: ModelGenerateKwargValue) -> torch.Tensor: + """Generate token IDs from prepared inputs. + + Args: + kwargs: Prepared model inputs and generation options. + """ + raise NotImplementedError("protocol not defined") + + def eval(self) -> None: + """Switch the model into eval mode.""" + raise NotImplementedError("protocol not defined") diff --git a/celune/typing/pipeline.py b/celune/typing/pipeline.py new file mode 100644 index 0000000..680f2c2 --- /dev/null +++ b/celune/typing/pipeline.py @@ -0,0 +1,16 @@ +"""Speech pipeline aliases.""" + +import queue +from typing import Optional, Union + +import numpy as np +import numpy.typing as npt + +from ..constants import PipelineStates +from ..dataclasses.pipeline import PlaybackChunk, PlaybackSourceDone, SpeechRequest + +SpeechStreamItem = Optional[Union[npt.NDArray[np.float32], Exception]] +SpeechStreamQueue = queue.Queue[SpeechStreamItem] +TextQueueItem = Union[SpeechRequest, PipelineStates] +AudioChunk = PlaybackChunk +AudioQueueItem = Union[PlaybackChunk, PlaybackSourceDone, PipelineStates] diff --git a/celune/typing/ui.py b/celune/typing/ui.py new file mode 100644 index 0000000..0fde3fe --- /dev/null +++ b/celune/typing/ui.py @@ -0,0 +1,98 @@ +"""UI protocol definitions.""" + +from typing import Optional, Protocol, TYPE_CHECKING + +if TYPE_CHECKING: + from ..celune import Celune + + +class CeluneBaseUI(Protocol): + """Celune base UI protocols.""" + + celune: "Celune" + + def run(self) -> None: + """Run the UI's main loop.""" + + +class CeluneTextualUI(CeluneBaseUI, Protocol): + """Protocol for Celune's interactive Textual UI callbacks.""" + + def tts_log(self, msg: str, severity: str = "info") -> None: + """Handle log messages coming from Celune. + + Args: + msg: Message text emitted by Celune. + severity: Message severity label. + """ + + def safe_status(self, msg: str, severity: str = "info") -> None: + """Update current status. + + Args: + msg: Status text to display. + severity: Status severity label. + """ + + def safe_progress( + self, progress: Optional[float], total: Optional[float] = None + ) -> None: + """Update current progress. + + Args: + progress: Current completed progress amount. + total: Optional total progress amount. + """ + + def error(self, error: str) -> None: + """Set the UI status to the error message. + + Args: + error: Error text to surface to the user. + """ + + def tts_idle(self) -> None: + """Reset UI state after Celune stops talking.""" + + def tts_queue_avail(self) -> None: + """Unlock input queueing after Celune completes generation.""" + + def tts_voice_changed(self, name: str) -> None: + """Set UI state after changing Celune's voice. + + Args: + name: Newly selected voice name. + """ + + def change_input_state(self, locked: bool) -> None: + """Lock or unlock Celune's UI layer. + + Args: + locked: Whether input should be locked. + """ + + def change_voice_lock_state(self, locked: bool) -> None: + """Lock or unlock Celune's voice change button. + + Args: + locked: Whether voice selection should be locked. + """ + + +class CeluneHeadlessBaseUI(CeluneBaseUI, Protocol): + """Protocol for Celune's headless UI callbacks.""" + + def headless_log(self, msg: str, severity: str = "info") -> None: + """Log to the headless interface. + + Args: + msg: Message text emitted by Celune. + severity: Message severity label. + """ + + def headless_error(self, error: str) -> None: + """Log an error to the headless interface. + + Args: + error: Error text to surface to the operator. + """ diff --git a/celune/typing/utils.py b/celune/typing/utils.py new file mode 100644 index 0000000..4968e3a --- /dev/null +++ b/celune/typing/utils.py @@ -0,0 +1,20 @@ +"""Utility-layer typed dictionaries.""" + +from typing import TypedDict + + +class CallerInfo(TypedDict): + """Caller information type annotation.""" + + function: str + filename: str + line: int + + +class LanguageResult(TypedDict): + """Language detection metadata type annotation.""" + + language: str + languages: list[str] + probabilities: dict[str, float] + supported: bool diff --git a/celune/ui/app.py b/celune/ui/app.py index 064b805..15e3928 100644 --- a/celune/ui/app.py +++ b/celune/ui/app.py @@ -6,37 +6,39 @@ import time import shlex import logging +import datetime import itertools import threading import contextlib -import datetime +from dataclasses import dataclass, field +from pathlib import Path from collections.abc import Iterator -from typing import cast, Optional, Callable, Union +from typing import Optional, Callable, Union, TextIO import yaml from rich.text import Text from textual.color import Color from textual.timer import Timer +from textual import work, events from textual.widget import Widget from textual.css.types import EdgeStyle -from textual import work, events from textual.app import App, ComposeResult from textual.containers import Horizontal, Vertical from textual.widgets import Label, RichLog, TextArea, Button, ProgressBar -from ..celune import Celune from .. import colors +from ..celune import Celune +from ..constants import APP_NAME from ..cevoice import default_loader -from .terminal import LogRedirect, UILogHandler from . import resources as ui_resources from .theme import CELUNE_CSS, severity_color +from .terminal import LogRedirect, UILogHandler +from ..paths import config_path, main_window_log_path from .commands import process_command as process_ui_command from ..persona.impl import ( persona_talkback_enabled, persona_enabled, ) -from ..constants import APP_NAME -from ..paths import config_path, main_window_log_path from ..utils import ( format_error, indent, @@ -47,6 +49,94 @@ ) +@dataclass +class CeluneUIWidgetState: + """Resolved widget references owned by the UI.""" + + logs: Optional[RichLog] = None + input_box: Optional[TextArea] = None + style_button: Optional[Button] = None + status: Optional[Label] = None + resources: Optional[Label] = None + progress_bar: Optional[ProgressBar] = None + + +@dataclass +class CeluneUIThemeState: + """Theme and status marquee state.""" + + themes: tuple[str, str] + active_theme_name: str + log_history: list[tuple[str, str]] = field(default_factory=list) + status_severity: str = "info" + status_text: str = "" + status_marquee_offset: int = 0 + status_marquee_gap: str = " " + status_marquee_timer: Optional[Timer] = None + + +@dataclass +class CeluneUIBindingState: + """Bindings between the UI and the runtime.""" + + celune: Optional[Celune] = None + celune_ready: bool = False + celune_styles: tuple[str, ...] = () + celune_voices: Optional[Iterator[str]] = None + style_index: int = 0 + cur_state: str = "active" + consume_on_boundary: bool = False + suppress_input_change: bool = False + resource_page: int = 0 + input_locked: bool = True + persona_available: bool = False + persona_probe_running: bool = False + + +@dataclass +class CeluneUILogCaptureState: + """Stdio/log redirection and persisted log state.""" + + old_stdout: TextIO + old_stderr: TextIO + log_stdout: Optional[LogRedirect] = None + log_stderr: Optional[LogRedirect] = None + runtime_log_capture_enabled: bool = False + runtime_redirect_loggers: Optional[dict[str, logging.Logger]] = None + runtime_redirect_handlers: Optional[dict[str, UILogHandler]] = None + runtime_redirect_original_handlers: Optional[dict[str, list[logging.Handler]]] = ( + None + ) + runtime_redirect_original_propagate: Optional[dict[str, bool]] = None + warnings_capture_enabled: bool = False + log_file_path: Path = field(default_factory=Path) + log_file_initialized: bool = False + + +@dataclass +class CeluneUIInteractionState: + """Transient UI effects, sleep scheduling, and tutorial state.""" + + border_pulse_tokens: dict[int, int] = field(default_factory=dict) + border_pulse_widgets: dict[int, Widget] = field(default_factory=dict) + tutorial_timers: list[Timer] = field(default_factory=list) + sleep_timer: Optional[Timer] = None + tutorial_token: int = 0 + tutorial_active: bool = False + + +def _forward_ui_property(container_name: str, field_name: str) -> property: + """Create a property that forwards storage to a grouped UI state container.""" + + def getter(instance): + return getattr(getattr(instance, container_name), field_name) + + def setter(instance, value) -> None: + setattr(getattr(instance, container_name), field_name, value) + + return property(getter, setter) + + class CeluneUI(App): """User interface.""" @@ -60,13 +150,6 @@ def __init__(self) -> None: if CeluneUI._instance is not None: raise RuntimeError(f"can only instantiate {self.__class__.__name__} once") - self.logs = cast(RichLog, None) - self.input_box = cast(TextArea, None) - self.style_button = cast(Button, None) - self.status = cast(Label, None) - self.resources = cast(Label, None) - self.progress_bar = cast(ProgressBar, None) - if is_april_fools() and os.getenv("CELUNE_DISABLE_APRIL_FOOLS") not in { "1", "true", @@ -74,54 +157,100 @@ def __init__(self) -> None: "yes", "enabled", }: - self.themes = ("celune_april_fools", "celune_april_fools") - self.active_theme_name = "celune_april_fools" + themes = ("celune_april_fools", "celune_april_fools") + active_theme_name = "celune_april_fools" else: - self.themes = ("celune", "celune_light") - self.active_theme_name = "celune" - self.log_history: list[tuple[str, str]] = [] - self.status_severity = "info" + themes = ("celune", "celune_light") + active_theme_name = "celune" - self.celune = cast(Celune, None) - self.celune_ready = False - self.celune_styles: tuple[str, ...] = () - self.celune_voices: Iterator[str] = itertools.cycle(self.celune_styles) - - self.style_index = 0 - - self._old_stdout = sys.stdout - self._old_stderr = sys.stderr - - self._log_stdout = cast(LogRedirect, None) - self._log_stderr = cast(LogRedirect, None) - self._runtime_log_capture_enabled = False - self._runtime_redirect_loggers: Optional[dict[str, logging.Logger]] = None - self._runtime_redirect_handlers: Optional[dict[str, UILogHandler]] = None - self._runtime_redirect_original_handlers: Optional[ - dict[str, list[logging.Handler]] - ] = None - self._runtime_redirect_original_propagate: Optional[dict[str, bool]] = None - self._warnings_capture_enabled: bool = False - - self.cur_state = "active" - - self.consume_on_boundary = False - self._suppress_input_change = False - self._resource_page = 0 - self._border_pulse_tokens: dict[int, int] = {} - self._border_pulse_widgets: dict[int, Widget] = {} - self._tutorial_timers: list[Timer] = [] - self._sleep_timer: Optional[Timer] = None - self._tutorial_token = 0 - self._tutorial_active = False - self._input_locked = True - self._persona_available = False - self._persona_probe_running = False - self._log_file_path = main_window_log_path(create_parent=True) - self._log_file_initialized = False + self._widgets = CeluneUIWidgetState() + self._theme_state = CeluneUIThemeState( + themes=themes, + active_theme_name=active_theme_name, + ) + self._binding_state = CeluneUIBindingState(celune_voices=itertools.cycle(())) + self._log_capture_state = CeluneUILogCaptureState( + old_stdout=sys.stdout, + old_stderr=sys.stderr, + log_file_path=main_window_log_path(create_parent=True), + ) + self._interaction_state = CeluneUIInteractionState() CeluneUI._instance = self + logs = _forward_ui_property("_widgets", "logs") + input_box = _forward_ui_property("_widgets", "input_box") + style_button = _forward_ui_property("_widgets", "style_button") + status = _forward_ui_property("_widgets", "status") + resources = _forward_ui_property("_widgets", "resources") + progress_bar = _forward_ui_property("_widgets", "progress_bar") + + themes = _forward_ui_property("_theme_state", "themes") + active_theme_name = _forward_ui_property("_theme_state", "active_theme_name") + log_history = _forward_ui_property("_theme_state", "log_history") + status_severity = _forward_ui_property("_theme_state", "status_severity") + _status_text = _forward_ui_property("_theme_state", "status_text") + _status_marquee_offset = _forward_ui_property( + "_theme_state", "status_marquee_offset" + ) + _status_marquee_gap = _forward_ui_property("_theme_state", "status_marquee_gap") + _status_marquee_timer = _forward_ui_property("_theme_state", "status_marquee_timer") + + celune = _forward_ui_property("_binding_state", "celune") + celune_ready = _forward_ui_property("_binding_state", "celune_ready") + celune_styles = _forward_ui_property("_binding_state", "celune_styles") + celune_voices = _forward_ui_property("_binding_state", "celune_voices") + style_index = _forward_ui_property("_binding_state", "style_index") + cur_state = _forward_ui_property("_binding_state", "cur_state") + consume_on_boundary = _forward_ui_property("_binding_state", "consume_on_boundary") + _suppress_input_change = _forward_ui_property( + "_binding_state", "suppress_input_change" + ) + _resource_page = _forward_ui_property("_binding_state", "resource_page") + _input_locked = _forward_ui_property("_binding_state", "input_locked") + _persona_available = _forward_ui_property("_binding_state", "persona_available") + _persona_probe_running = _forward_ui_property( + "_binding_state", "persona_probe_running" + ) + + _old_stdout = _forward_ui_property("_log_capture_state", "old_stdout") + _old_stderr = _forward_ui_property("_log_capture_state", "old_stderr") + _log_stdout = _forward_ui_property("_log_capture_state", "log_stdout") + _log_stderr = _forward_ui_property("_log_capture_state", "log_stderr") + _runtime_log_capture_enabled = _forward_ui_property( + "_log_capture_state", "runtime_log_capture_enabled" + ) + _runtime_redirect_loggers = _forward_ui_property( + "_log_capture_state", "runtime_redirect_loggers" + ) + _runtime_redirect_handlers = _forward_ui_property( + "_log_capture_state", "runtime_redirect_handlers" + ) + _runtime_redirect_original_handlers = _forward_ui_property( + "_log_capture_state", "runtime_redirect_original_handlers" + ) + _runtime_redirect_original_propagate = _forward_ui_property( + "_log_capture_state", "runtime_redirect_original_propagate" + ) + _warnings_capture_enabled = _forward_ui_property( + "_log_capture_state", "warnings_capture_enabled" + ) + _log_file_path = _forward_ui_property("_log_capture_state", "log_file_path") + _log_file_initialized = _forward_ui_property( + "_log_capture_state", "log_file_initialized" + ) + + _border_pulse_tokens = _forward_ui_property( + "_interaction_state", "border_pulse_tokens" + ) + _border_pulse_widgets = _forward_ui_property( + "_interaction_state", "border_pulse_widgets" + ) + _tutorial_timers = _forward_ui_property("_interaction_state", "tutorial_timers") + _sleep_timer = _forward_ui_property("_interaction_state", "sleep_timer") + _tutorial_token = _forward_ui_property("_interaction_state", "tutorial_token") + _tutorial_active = _forward_ui_property("_interaction_state", "tutorial_active") + def _run_on_ui_thread(self, callback: Callable[[], None]) -> None: if threading.current_thread() is threading.main_thread(): callback() @@ -171,6 +300,55 @@ def _refresh_status(self) -> None: return self.status.styles.color = self._severity_color(self.status_severity) + def _status_view_width(self) -> int: + """Estimate how many status characters can fit without clipping.""" + if self.status is None: + return 32 + + size = getattr(self.status, "size", None) + width = getattr(size, "width", 0) if size is not None else 0 + if isinstance(width, int) and width > 6: + return max(8, width - 2) + return 32 + + def _render_status_text(self) -> str: + """Return the current status text, marqueeing when it exceeds the label width.""" + width = self._status_view_width() + if len(self._status_text) <= width: + self._status_marquee_offset = 0 + return indent(self._status_text, spaces=2) + + loop = f"{self._status_text}{self._status_marquee_gap}" + offset = self._status_marquee_offset % len(loop) + window = (loop * 2)[offset : offset + width] + return indent(window, spaces=2) + + def _update_status_label(self) -> None: + """Push the current status text into the label.""" + if self.status is None: + return + self.status.update(self._render_status_text()) + self._refresh_status() + + def _advance_status_marquee(self) -> None: + """Advance the marquee one character for long status messages.""" + if self.status is None: + return + if len(self._status_text) <= self._status_view_width(): + self._update_status_label() + return + self._status_marquee_offset += 1 + self._update_status_label() + + def on_resize(self, _event: events.Resize) -> None: + """Re-render width-sensitive widgets after the window size changes. + + Args: + _event: Textual resize event that triggered the redraw. + """ + if self.status is not None: + self._update_status_label() + def _refresh_logs(self) -> None: """Repaint existing log entries using the active theme colors.""" if self.logs is None: @@ -301,6 +479,9 @@ def on_mount(self) -> None: self._install_runtime_log_redirects() ui_resources.prime_usage() self.set_interval(2.06, self.advance_resources) + self._status_marquee_timer = self.set_interval( + 0.18, self._advance_status_marquee + ) self.call_after_refresh(self.start_background_init) self.safe_status("Initializing") @@ -506,10 +687,14 @@ def load_tts(self) -> None: f"New to {APP_NAME}? Type /tutorial to begin the tutorial." ) self._schedule_sleep_timer() - + else: + self.error(f"{APP_NAME} could not start") + self.cur_state = "error" except Exception as e: self.safe_log(f"[INIT ERROR] {format_error(e, self.celune.dev)}", "error") self.celune.glow.fatal() + if not self.celune._try_play_signal("error"): + self.safe_log_dev("Could not play the error signal.", "warning") self.error(f"{APP_NAME} could not start") self.cur_state = "error" @@ -753,8 +938,9 @@ def safe_status(self, msg: str, severity: str = "info") -> None: self.status_severity = severity def update() -> None: - self.status.update(indent(msg, spaces=2)) - self._refresh_status() + self._status_text = msg + self._status_marquee_offset = 0 + self._update_status_label() self.update_resources() self._run_on_ui_thread(update) @@ -1211,24 +1397,24 @@ def _graceful_exit(self) -> None: self.exit() def graceful_exit(self) -> None: - """Public interface for CeluneUI._graceful_exit().""" + """Exit the UI through the same graceful shutdown path as internal callers.""" self._graceful_exit() @property def tutorial_token(self) -> int: - """Property for accessing the tutorial token held by Celune. + """Return the active tutorial cancellation token. Returns: - int: The tutorial token currently in use by Celune. + int: The tutorial token currently used to invalidate pending tutorial work. """ return self._tutorial_token @property def tutorial_active(self) -> bool: - """Property for accessing whether the tutorial is active or not. + """Return whether a tutorial flow is currently active. Returns: - bool: Celune's current tutorial flag. + bool: ``True`` when tutorial work is active, otherwise ``False``. """ return self._tutorial_active @@ -1250,13 +1436,13 @@ def _split_command_input(text: str) -> list[str]: @staticmethod def split_command_input(text: str) -> list[str]: - """Public interface for CeluneUI._split_command_input(). + """Split one slash-command string into a command name and arguments. Args: text: The command input to split. Returns: - list[str]: The return value of _split_command_input(), containing a split command name and arguments. + list[str]: The parsed command name followed by its arguments. """ return CeluneUI._split_command_input(text) diff --git a/celune/ui/commands.py b/celune/ui/commands.py index 2604720..f73c004 100644 --- a/celune/ui/commands.py +++ b/celune/ui/commands.py @@ -11,11 +11,11 @@ import soundfile as sf -from ..backends.qwen3 import Qwen3 +from ..paths import project_root from ..constants import APP_NAME -from ..utils import format_error +from ..backends.qwen3 import Qwen3 from ..exceptions import InvalidExtensionError -from ..utils import replace_ipa +from ..utils import format_error, replace_ipa, format_number if TYPE_CHECKING: from .app import CeluneUI @@ -65,7 +65,9 @@ def tutorial(ui: CeluneUI) -> None: Args: ui: The instance of CeluneUI that the tutorial will interact with. """ - assets = Path(__file__).resolve().parents[1] / "assets" + assets = project_root() / "celune" / "assets" + if not assets.exists(): + assets = project_root() / "assets" if not assets.exists(): ui.safe_log("No tutorial assets found.", "warning") return @@ -341,13 +343,40 @@ def process_command(ui: CeluneUI, command: str, args: list[str]) -> None: return if command == "play": if not args: - ui.safe_log("Usage: /play ", "warning") + ui.safe_log("Usage: /play [volume]", "warning") return try: - if not ui.celune.play(args[0]): - return - ui.safe_log(f"Playing {args[0]}") + volume = 1.0 + if len(args) >= 2: + try: + volume = float(args[1]) + except ValueError: + ui.safe_log( + f"Invalid volume for '{command}', must be numeric.", + "warning", + ) + return + + def worker() -> None: + try: + if not ui.celune.play(args[0], volume=volume): + return + if args[0].startswith("https://"): + ui.safe_log( + f"Playing YouTube audio at {format_number(volume * 100)}% volume" + ) + else: + ui.safe_log( + f"Playing {args[0]} at {format_number(volume * 100)}% volume" + ) + except Exception as exc: + ui.safe_log( + f"Cannot play this audio: {format_error(exc, ui.celune.dev)}", + "error", + ) + + threading.Thread(target=worker, daemon=True).start() except Exception as e: ui.safe_log( f"Cannot play this file: {format_error(e, ui.celune.dev)}", diff --git a/celune/ui/headless.py b/celune/ui/headless.py index b3d2c20..fe4d86a 100644 --- a/celune/ui/headless.py +++ b/celune/ui/headless.py @@ -10,8 +10,8 @@ from ..celune import Celune from ..utils import discard -from ..constants import APP_NAME, ExitCodes, SIGTSTP from ..config import Config, config_bool +from ..constants import APP_NAME, ExitCodes, SIGTSTP class CeluneHeadlessUI: diff --git a/celune/ui/protocols.py b/celune/ui/protocols.py index 52f7bb0..89df509 100644 --- a/celune/ui/protocols.py +++ b/celune/ui/protocols.py @@ -1,98 +1,6 @@ # SPDX-License-Identifier: MIT -"""UI callback protocols.""" +"""Compatibility re-exports for UI callback protocols.""" -from typing import Protocol, Optional +from ..typing.ui import CeluneBaseUI, CeluneHeadlessBaseUI, CeluneTextualUI -from ..celune import Celune - - -class CeluneBaseUI(Protocol): - """Celune base UI protocols.""" - - celune: Celune - - def run(self) -> None: - """Run the UI's main loop.""" - - -class CeluneTextualUI(CeluneBaseUI, Protocol): - """Protocol for Celune's interactive Textual UI callbacks.""" - - def tts_log(self, msg: str, severity: str = "info") -> None: - """Handle log messages coming from Celune. - - Args: - msg: The message to be logged. - severity: The severity to log the message as. - """ - - def safe_status(self, msg: str, severity: str = "info") -> None: - """Update current status. - - Args: - msg: The message to be logged. - severity: The severity to log the message as. - """ - - def safe_progress( - self, progress: Optional[float], total: Optional[float] = None - ) -> None: - """Update current progress. - - Args: - progress: How many steps were processed. - total: How many total steps are to be processed. - """ - - def error(self, error: str) -> None: - """Set the UI status to the error message. - - Args: - error: The error message to log. - """ - - def tts_idle(self) -> None: - """Reset UI state after Celune stops talking.""" - - def tts_queue_avail(self) -> None: - """Unlock input queueing after Celune completes generation.""" - - def tts_voice_changed(self, name: str) -> None: - """Set UI state after changing Celune's voice. - - Args: - name: The loaded voice name. - """ - - def change_input_state(self, locked: bool) -> None: - """Lock or unlock Celune's UI layer. - - Args: - locked: The new UI lock state. - """ - - def change_voice_lock_state(self, locked: bool) -> None: - """Lock or unlock Celune's voice change button. - - Args: - locked: The new voice change lock state. - """ - - -class CeluneHeadlessBaseUI(CeluneBaseUI, Protocol): - """Protocol for Celune's headless UI callbacks.""" - - def headless_log(self, msg: str, severity: str = "info") -> None: - """Log to the headless interface. - - Args: - msg: The message to be logged. - severity: The severity to log the message as. - """ - - def headless_error(self, error: str) -> None: - """Log an error to the headless interface. - - Args: - error: The error message to log. - """ +__all__ = ["CeluneBaseUI", "CeluneHeadlessBaseUI", "CeluneTextualUI"] diff --git a/celune/ui/resources.py b/celune/ui/resources.py index e65edc8..0f15537 100644 --- a/celune/ui/resources.py +++ b/celune/ui/resources.py @@ -55,7 +55,13 @@ def gpu_usage() -> Optional[int]: if proc.poll() is None: return _NVIDIA_SMI_USAGE - stdout, _ = proc.communicate() + try: + stdout, _ = proc.communicate() + except (OSError, ValueError, subprocess.SubprocessError): + _NVIDIA_SMI_PROC = None + _NVIDIA_SMI_USAGE = None + return None + _NVIDIA_SMI_PROC = None if proc.returncode != 0: diff --git a/celune/ui/terminal.py b/celune/ui/terminal.py index 41ee370..8862549 100644 --- a/celune/ui/terminal.py +++ b/celune/ui/terminal.py @@ -4,8 +4,8 @@ import sys import re import logging -from collections.abc import Collection from typing import Callable, Optional +from collections.abc import Collection import readchar diff --git a/celune/ui/theme.py b/celune/ui/theme.py index 6fa0117..eaafc09 100644 --- a/celune/ui/theme.py +++ b/celune/ui/theme.py @@ -3,7 +3,7 @@ from ..colors import SEVERITY_COLORS -# this CSS draws its palette from the currently loaded CEVOICE pack +# this CSS draws its palette from the currently loaded CEVOICE/CECHAR pack CELUNE_CSS = """ Screen { layout: vertical; diff --git a/celune/updater.py b/celune/updater.py index 33d2516..26b2f9c 100644 --- a/celune/updater.py +++ b/celune/updater.py @@ -1,19 +1,34 @@ # SPDX-License-Identifier: MIT """Celune automatic update helpers.""" +from __future__ import annotations + +import ctypes +import hashlib +import json import os import re +import shutil import subprocess +import sys +import tempfile +import time +import urllib.request +import zipfile +from dataclasses import dataclass from pathlib import Path from typing import Optional -from dataclasses import dataclass from . import __version__ from .exceptions import UpdateError +from .paths import project_root, running_compiled REMOTE_URL = "https://github.com/celunah/celune.git" +ARTIFACT_BASE_URL = "https://nightly.link/celunah/celune/workflows/ci" +UPDATE_MANIFEST_NAME = "celune-update.json" SHORT_HASH_LENGTH = 7 UPDATE_BRANCHES = {"main", "master"} +DOWNLOAD_TIMEOUT = 30 @dataclass(frozen=True) @@ -36,9 +51,44 @@ class VersionKey: suffix: str = "" +@dataclass(frozen=True) +class BundleManifest: + """Compiled bundle metadata distributed with launcher artifacts.""" + + version: str + revision: str + artifact: str + files: dict[str, str] + + def _repo_root() -> Path: """Return where the Git repository root is located.""" - return Path(__file__).resolve().parent.parent + return project_root() + + +def _bundle_dir() -> Path: + """Return the compiled bundle directory used by the launcher.""" + executable = Path(sys.argv[0]).resolve() + if executable.is_dir(): + return executable + return executable.parent + + +def _manifest_path(bundle_dir: Optional[Path] = None) -> Path: + """Return the bundled update manifest path.""" + return (bundle_dir or _bundle_dir()) / UPDATE_MANIFEST_NAME + + +def _platform_artifact_name() -> str: + """Return the CI artifact name for the current platform.""" + if os.name == "nt": + return "Celune-win-x64" + return "Celune-linux-x64" + + +def _artifact_download_url(branch: str, artifact: str) -> str: + """Return the direct nightly.link ZIP URL for one workflow artifact.""" + return f"{ARTIFACT_BASE_URL}/{branch}/{artifact}.zip" def _run_git(args: list[str], timeout: int = 15) -> str: @@ -201,16 +251,184 @@ def _is_git_checkout() -> bool: return False +def _sha256_file(path: Path) -> str: + """Return the SHA-256 digest for one file.""" + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def _bundle_checksums( + bundle_dir: Path, + filenames: tuple[str, ...] | list[str], +) -> dict[str, str]: + """Return checksums for bundle files present in one install directory.""" + checksums: dict[str, str] = {} + for filename in filenames: + path = bundle_dir / filename + if path.is_file(): + checksums[filename] = _sha256_file(path) + return checksums + + +def _parse_bundle_manifest(raw: object) -> Optional[BundleManifest]: + """Convert raw JSON-like data into bundle metadata.""" + if not isinstance(raw, dict): + return None + + version = raw.get("version") + revision = raw.get("revision") + artifact = raw.get("artifact") + files = raw.get("files") + if not ( + isinstance(version, str) + and isinstance(revision, str) + and isinstance(artifact, str) + and isinstance(files, dict) + ): + return None + + normalized_files: dict[str, str] = {} + for name, digest in files.items(): + if isinstance(name, str) and isinstance(digest, str): + normalized_files[name] = digest + + if not normalized_files: + return None + + return BundleManifest( + version=version, + revision=revision, + artifact=artifact, + files=normalized_files, + ) + + +def _load_local_bundle_manifest( + bundle_dir: Optional[Path] = None, +) -> Optional[BundleManifest]: + """Load the local compiled bundle manifest when available.""" + manifest_file = _manifest_path(bundle_dir) + try: + payload = json.loads(manifest_file.read_text(encoding="utf-8")) + except (FileNotFoundError, OSError, json.JSONDecodeError): + return None + return _parse_bundle_manifest(payload) + + +def _download_to_file(url: str, destination: Path) -> None: + """Download one URL into the given destination path.""" + request = urllib.request.Request( + url, + headers={"User-Agent": "Celune-Updater/1.0"}, + ) + with urllib.request.urlopen(request, timeout=DOWNLOAD_TIMEOUT) as response: + with destination.open("wb") as handle: + shutil.copyfileobj(response, handle) + + +def _download_artifact_zip(branch: str, artifact: str, destination: Path) -> None: + """Download the latest launcher artifact ZIP for one branch.""" + _download_to_file(_artifact_download_url(branch, artifact), destination) + + +def _manifest_from_zip(zip_path: Path) -> Optional[BundleManifest]: + """Load bundled update metadata from an artifact ZIP file.""" + try: + with zipfile.ZipFile(zip_path) as archive: + for name in archive.namelist(): + if Path(name).name != UPDATE_MANIFEST_NAME: + continue + with archive.open(name) as handle: + payload = json.loads(handle.read().decode("utf-8")) + return _parse_bundle_manifest(payload) + except (OSError, zipfile.BadZipFile, json.JSONDecodeError): + return None + return None + + +def _read_remote_bundle_manifest(branch: str) -> Optional[BundleManifest]: + """Download the latest artifact manifest for this platform.""" + artifact = _platform_artifact_name() + with tempfile.TemporaryDirectory(prefix="celune-update-check-") as temp_dir: + zip_path = Path(temp_dir) / f"{artifact}.zip" + try: + _download_artifact_zip(branch, artifact, zip_path) + except OSError: + return None + return _manifest_from_zip(zip_path) + + +def _compiled_bundle_matches_remote( + bundle_dir: Path, + remote_manifest: BundleManifest, +) -> bool: + """Return whether the installed bundle already matches the remote artifact.""" + local_files = _bundle_checksums(bundle_dir, list(remote_manifest.files)) + return bool(local_files) and local_files == remote_manifest.files + + +def _check_for_compiled_update() -> Optional[UpdateInfo]: + """Check whether the packaged launcher bundle differs from the latest artifact.""" + local_manifest = _load_local_bundle_manifest() + if local_manifest is None: + return None + + branch = "main" + remote_revision = "" + latest_tag = "" + latest_tag_revision = "" + try: + if _is_git_checkout(): + branch = _current_branch() or branch + if branch and branch not in UPDATE_BRANCHES: + return None + remote_revision = ( + _remote_branch_revision(branch) if branch else _remote_head_revision() + ) + latest_tag, latest_tag_revision = _latest_remote_tag() + except ( + subprocess.CalledProcessError, + subprocess.TimeoutExpired, + FileNotFoundError, + ValueError, + ): + branch = "main" + + remote_manifest = _read_remote_bundle_manifest(branch or "main") + if remote_manifest is None: + return None + + if _compiled_bundle_matches_remote(_bundle_dir(), remote_manifest): + return None + + latest_revision = remote_manifest.revision or remote_revision or latest_tag_revision + latest_version = remote_manifest.version or latest_tag or _base_version(__version__) + return UpdateInfo( + local_version=local_manifest.version, + local_revision=_short_revision(local_manifest.revision), + local_tag="", + latest_version=latest_version, + latest_revision=_short_revision(latest_revision), + latest_tag=latest_tag, + ) + + def check_for_update() -> Optional[UpdateInfo]: - """Check GitHub for a newer Celune revision or tag. + """Check for a newer Celune revision or packaged launcher bundle. Returns: - Optional[UpdateInfo]: Information about the update, or ``None`` when Celune appears current or update metadata - cannot be read. + Optional[UpdateInfo]: Metadata describing the available update, or ``None`` when no safe update path is + currently available. """ if os.getenv("CELUNE_SKIP_UPDATE") in {"1", "true", "on", "yes", "enabled"}: return None + if running_compiled(): + return _check_for_compiled_update() + if not _is_git_checkout(): return None @@ -259,12 +477,77 @@ def check_for_update() -> Optional[UpdateInfo]: ) -def update_to_latest() -> None: - """Fast-forward the local checkout to GitHub's current Celune revision. +def _extract_artifact_root(zip_path: Path, destination: Path) -> Path: + """Extract one artifact ZIP and return the directory containing the manifest.""" + with zipfile.ZipFile(zip_path) as archive: + archive.extractall(destination) + + for manifest in destination.rglob(UPDATE_MANIFEST_NAME): + return manifest.parent + + raise UpdateError("downloaded artifact is missing update metadata") + + +def _replace_path(source: Path, destination: Path) -> None: + """Replace one file or directory in the install directory.""" + if destination.exists(): + if destination.is_dir() and not destination.is_symlink(): + shutil.rmtree(destination) + else: + destination.unlink() + + if source.is_dir(): + shutil.copytree(source, destination) + else: + destination.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source, destination) + + +def _apply_compiled_update(install_dir: Optional[Path] = None) -> None: + """Download and replace the packaged launcher bundle in place.""" + bundle_dir = (install_dir or _bundle_dir()).resolve() + branch = "main" + if _is_git_checkout(): + try: + branch = _current_branch() or branch + except ( + subprocess.CalledProcessError, + subprocess.TimeoutExpired, + FileNotFoundError, + ): + branch = "main" + + artifact = _platform_artifact_name() + with tempfile.TemporaryDirectory(prefix="celune-update-") as temp_dir: + temp_root = Path(temp_dir) + zip_path = temp_root / f"{artifact}.zip" + try: + _download_artifact_zip(branch, artifact, zip_path) + except OSError as exc: + raise UpdateError(f"could not download the latest artifact: {exc}") from exc + + try: + extracted_root = _extract_artifact_root(zip_path, temp_root / "artifact") + except (OSError, zipfile.BadZipFile, UpdateError) as exc: + raise UpdateError(f"could not unpack the latest artifact: {exc}") from exc + + for source in extracted_root.iterdir(): + _replace_path(source, bundle_dir / source.name) + + +def update_to_latest(install_dir: Optional[Path] = None) -> None: + """Update Celune either from Git or from the latest packaged artifact. + + Args: + install_dir: Optional compiled-install directory to replace in place. Raises: - UpdateError: Celune cannot be updated safely. + UpdateError: Raised when the repository or packaged install cannot be updated safely. """ + if running_compiled() or install_dir is not None: + _apply_compiled_update(install_dir=install_dir) + return + if not _is_git_checkout(): raise UpdateError("did not find a repository") @@ -320,3 +603,57 @@ def update_to_latest() -> None: raise UpdateError(f"timed out merging after {exc.timeout} seconds") from exc except FileNotFoundError as exc: raise UpdateError("git is not available") from exc + + +def _wait_for_pid_exit(pid: int, timeout: float = 120.0) -> None: + """Wait until the launcher process fully exits before replacing it.""" + if pid <= 0: + return + + if os.name == "nt": + kernel32 = ctypes.windll.kernel32 + synchronize = 0x00100000 + handle = kernel32.OpenProcess(synchronize, False, pid) + if not handle: + return + try: + wait_ms = max(0, int(timeout * 1000)) + result = kernel32.WaitForSingleObject(handle, wait_ms) + if result == 0x00000102: + raise UpdateError("timed out waiting for the launcher to exit") + finally: + kernel32.CloseHandle(handle) + return + + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + os.kill(pid, 0) + except ProcessLookupError: + return + except PermissionError: + return + time.sleep(0.1) + + raise UpdateError("timed out waiting for the launcher to exit") + + +def apply_update_and_restart( + parent_pid: int, + launcher_path: Path, + launcher_args: list[str], +) -> int: + """Wait for the launcher, apply the update, then restart it. + + Args: + parent_pid: Process ID of the launcher that must fully exit first. + launcher_path: Path to the outer ``celune`` launcher binary to restart. + launcher_args: Original launcher arguments to pass back into the restart. + + Returns: + int: Process exit code to return from the updater helper. + """ + _wait_for_pid_exit(parent_pid) + update_to_latest(install_dir=launcher_path.resolve().parent) + subprocess.Popen([str(launcher_path), *launcher_args], cwd=_repo_root()) # pylint: disable=R1732 + return 0 diff --git a/celune/utils.py b/celune/utils.py index 7cd006f..48b26e2 100644 --- a/celune/utils.py +++ b/celune/utils.py @@ -14,31 +14,15 @@ import multiprocessing from pathlib import Path from collections.abc import Iterator -from typing import Union, Callable, Optional, Literal, TypedDict, Any, overload +from typing import Union, Callable, Optional, Literal, Any, overload import psutil import langdetect -from .constants import REFERENCE_NEW_MOON from .paths import traceback_path +from .constants import REFERENCE_NEW_MOON from .terminal import supports_ansi as terminal_supports_ansi - - -class CallerInfo(TypedDict): - """Caller information type annotation.""" - - function: str - filename: str - line: int - - -class LanguageResult(TypedDict): - """Language detection metadata type annotation.""" - - language: str - languages: list[str] - probabilities: dict[str, float] - supported: bool +from .typing.utils import CallerInfo, LanguageResult def get_revision() -> str: @@ -828,3 +812,29 @@ def raise_test() -> None: """ raise RuntimeError("testing exception") + + +def normalize_special_characters(text: str) -> str: + """Normalize special characters in input string for TTS. + + Args: + text: The text to normalize. + + Returns: + str: The normalized text. + """ + + special_char_mappings = str.maketrans( + { + "\u201c": '"', # left double quote + "\u201d": '"', # right double quote + "\u201e": '"', # double low quote + "\u2018": "'", # left single quote + "\u2019": "'", # right single quote + "\u2013": "-", # en dash + "\u2014": "-", # em dash + "\u2026": "...", # ellipsis + } + ) + + return text.translate(special_char_mappings) diff --git a/celune/vram.py b/celune/vram.py index d9bd7c1..ab09e94 100644 --- a/celune/vram.py +++ b/celune/vram.py @@ -2,19 +2,27 @@ """VRAM preset resolution helpers for Celune.""" import math -from collections.abc import Mapping from dataclasses import dataclass -from typing import Literal, Optional, cast +from collections.abc import Mapping +from typing import Optional, cast import torch from .constants import JSONSerializable, VRAM_REQUIREMENTS, TIERS - -type VramTier = Literal["low", "medium", "high", "xhigh"] +from .typing.common import VramTier QWEN3_0_6B_MODEL = "Qwen/Qwen3-TTS-12Hz-0.6B-Base" QWEN3_1_7B_MODEL = "Qwen/Qwen3-TTS-12Hz-1.7B-Base" +# fake is only included to satisfy the test suite +# it is not a real backend +BACKENDS_ALLOWED: Mapping[VramTier, list[str]] = { + "low": ["mini", "qwen3", "fake"], + "medium": ["mini", "qwen3", "fake"], + "high": ["mini", "qwen3", "dotstts", "voxcpm2", "fake"], + "xhigh": ["mini", "qwen3", "dotstts", "voxcpm2", "fake"], +} + @dataclass(frozen=True, slots=True) class VramPreset: @@ -105,7 +113,7 @@ def resolve_vram_preset( return VramPreset( tier="low", default_backend="mini", - allow_voxcpm2=False, + allow_voxcpm2="voxcpm2" in BACKENDS_ALLOWED["low"], qwen3_clone_model_id=QWEN3_0_6B_MODEL, persona_enabled=False, persona_quantization="4bit", @@ -116,7 +124,7 @@ def resolve_vram_preset( return VramPreset( tier="medium", default_backend="qwen3", - allow_voxcpm2=False, + allow_voxcpm2="voxcpm2" in BACKENDS_ALLOWED["medium"], qwen3_clone_model_id=QWEN3_1_7B_MODEL, persona_enabled=False, persona_quantization="4bit", @@ -127,7 +135,7 @@ def resolve_vram_preset( return VramPreset( tier="high", default_backend="qwen3", - allow_voxcpm2=True, + allow_voxcpm2="voxcpm2" in BACKENDS_ALLOWED["high"], qwen3_clone_model_id=QWEN3_1_7B_MODEL, persona_enabled=True, persona_quantization="4bit", @@ -137,7 +145,7 @@ def resolve_vram_preset( return VramPreset( tier="xhigh", default_backend="qwen3", - allow_voxcpm2=True, + allow_voxcpm2="voxcpm2" in BACKENDS_ALLOWED["xhigh"], qwen3_clone_model_id=QWEN3_1_7B_MODEL, persona_enabled=True, persona_quantization="8bit", @@ -163,9 +171,7 @@ def resolve_backend_name( return preset.default_backend normalized = requested_backend.strip().lower() - if normalized == "voxcpm2" and not preset.allow_voxcpm2: - return preset.default_backend - if normalized in {"qwen3", "voxcpm2", "mini"}: + if normalized in BACKENDS_ALLOWED[preset.tier]: return normalized return preset.default_backend @@ -186,12 +192,4 @@ def backend_allowed( """ normalized = backend_name.strip().lower() preset = resolve_vram_preset(config) - if normalized == "qwen3": - return True - if normalized == "voxcpm2": - return preset.allow_voxcpm2 - if normalized == "mini": - return True - if normalized == "fake": # NOTE: only for testing! - return True - return False + return normalized in BACKENDS_ALLOWED[preset.tier] diff --git a/default_config.yaml b/default_config.yaml index e6894d4..d328a1a 100644 --- a/default_config.yaml +++ b/default_config.yaml @@ -3,6 +3,7 @@ # of certain backends # # qwen3 = faster, less expressive +# dotstts = slower, very high quality # voxcpm2 = slower, high quality # mini = much faster, and just as good backend: null @@ -48,8 +49,8 @@ use_normalizer: false # # low (CPU/6GB) - Celune Mini (Pocket TTS) / Qwen3 0.6B+ normalizer CPU (caution: choosing Qwen may harm outputs) # medium (8GB) - Qwen3 1.7B + normalizer CPU -# high (12GB) - Qwen3 1.7B / VoxCPM2 + Persona 4-bit + normalizer CPU -# xhigh (16GB+) - everything allowed + Persona 8-bit + normalizer GPU +# high (12GB) - Qwen3 1.7B / dots.tts MF / VoxCPM2 + Persona 4-bit + normalizer CPU +# xhigh (16GB+) - everything allowed, including dots.tts MF and VoxCPM2 + Persona 8-bit + normalizer GPU vram: medium # sleep mode behavior @@ -88,16 +89,17 @@ api: # # Persona is Celune's thinking system using RAG (Retrieval Augmented Generation) to let characters # keep a conversational context and remember you as you speak with said characters -# it uses Qwen/Qwen2.5-VL-3B-Instruct to ensure it can run on consumer hardware +# it uses Qwen/Qwen3-VL-4B-Instruct to ensure it can run on consumer hardware persona: # enabled? enabled: true - # which Qwen model to use? (in case you want to use an FT) - model_id: Qwen/Qwen2.5-VL-3B-Instruct + # which Qwen model to use? (in case you want to use an FT or uncensored version) + model_id: Qwen/Qwen3-VL-4B-Instruct + # model_id: huihui-ai/Huihui-Qwen3-VL-4B-Instruct-abliterated # memory settings memory: # how much do you want Celune to remember - max_short_term_messages: 5 + max_short_term_messages: 20 # where semantic long-term memory retrieval should persist data storage_dir: null # minimum cosine similarity for semantic long-term memory retrieval diff --git a/demos/balanced_lc_qwen.wav b/demos/balanced_lc_qwen.wav index 98e76e4..3076dbe 100644 Binary files a/demos/balanced_lc_qwen.wav and b/demos/balanced_lc_qwen.wav differ diff --git a/demos/bold_lc_old.wav b/demos/bold_lc_old.wav index b577c6a..7fb2acb 100644 Binary files a/demos/bold_lc_old.wav and b/demos/bold_lc_old.wav differ diff --git a/demos/bold_lc_qwen.wav b/demos/bold_lc_qwen.wav index 355338a..e389fa7 100644 Binary files a/demos/bold_lc_qwen.wav and b/demos/bold_lc_qwen.wav differ diff --git a/demos/calm_lc_old.wav b/demos/calm_lc_old.wav index bb33a7d..2a5394d 100644 Binary files a/demos/calm_lc_old.wav and b/demos/calm_lc_old.wav differ diff --git a/demos/calm_lc_qwen.wav b/demos/calm_lc_qwen.wav index 5edc72a..57fc807 100644 Binary files a/demos/calm_lc_qwen.wav and b/demos/calm_lc_qwen.wav differ diff --git a/demos/upbeat_lc_qwen.wav b/demos/upbeat_lc_qwen.wav index 39f2bf3..ded6a27 100644 Binary files a/demos/upbeat_lc_qwen.wav and b/demos/upbeat_lc_qwen.wav differ diff --git a/demos/webui_desktop.png b/demos/webui_desktop.png new file mode 100644 index 0000000..9600099 Binary files /dev/null and b/demos/webui_desktop.png differ diff --git a/demos/webui_mobile_responsive.png b/demos/webui_mobile_responsive.png new file mode 100644 index 0000000..75c62b5 Binary files /dev/null and b/demos/webui_mobile_responsive.png differ diff --git a/launcher.c b/launcher.c index 7c784b9..37518e1 100644 --- a/launcher.c +++ b/launcher.c @@ -7,6 +7,7 @@ #elif defined(_WIN32) #include #include +#include #endif #include @@ -15,8 +16,68 @@ #include #define printfe(...) do { fprintf(stderr, __VA_ARGS__); } while (0) +#define EXIT_PENDING_UPDATE 7 #ifdef __linux__ +static int file_exists(const char *path) { + return access(path, F_OK) == 0; +} + +static int copy_text(char *dest, size_t size, const char *src) { + size_t len = strlen(src); + + if (len >= size) { + return 0; + } + + memcpy(dest, src, len + 1); + return 1; +} + +static int parent_dir_of(const char *path, char *out, size_t size) { + if (!copy_text(out, size, path)) { + return 0; + } + + char *last = strrchr(out, '/'); + if (last == NULL) { + return 0; + } + + *last = '\0'; + return 1; +} + +static int find_repo_root(const char *start_dir, char *out, size_t size) { + char current[1024]; + if (!copy_text(current, sizeof(current), start_dir)) { + return 0; + } + + while (1) { + char pyvenv_cfg[1200]; + int written = snprintf(pyvenv_cfg, sizeof(pyvenv_cfg), "%s/.venv/pyvenv.cfg", current); + if (written > 0 && (size_t)written < sizeof(pyvenv_cfg) && file_exists(pyvenv_cfg)) { + return copy_text(out, size, current); + } + + char parent[1024]; + if (!parent_dir_of(current, parent, sizeof(parent))) { + break; + } + + if (strcmp(parent, current) == 0 || parent[0] == '\0') { + break; + } + + if (!copy_text(current, sizeof(current), parent)) { + return 0; + } + } + + return 0; +} + int get_exe_dir(char *out, size_t size) { ssize_t len = readlink("/proc/self/exe", out, size - 1); @@ -34,6 +95,53 @@ int get_exe_dir(char *out, size_t size) { return 0; } + +static int spawn_update_helper_unix( + const char *python, + const char *main_py, + const char *launcher_path, + const char *repo_root, + int argc, + char **argv +) { + pid_t pid = fork(); + if (pid == -1) { + perror("fork failed"); + return 0; + } + + if (pid == 0) { + char pid_text[32]; + snprintf(pid_text, sizeof(pid_text), "%ld", (long)getppid()); + + char **args = malloc(((size_t)argc + 5U) * sizeof(char *)); + if (args == NULL) { + perror("malloc failed"); + _exit(1); + } + + args[0] = (char *)python; + args[1] = (char *)main_py; + args[2] = "__apply_update"; + args[3] = pid_text; + args[4] = (char *)launcher_path; + for (int i = 1; i < argc; i++) { + args[i + 4] = argv[i]; + } + args[argc + 4] = NULL; + + if (chdir(repo_root) != 0) { + perror("chdir failed"); + _exit(1); + } + + execv(args[0], args); + perror("execv failed"); + _exit(1); + } + + return 1; +} #elif defined(_WIN32) int get_exe_dir(char *out, size_t size) { DWORD len = GetModuleFileNameA(NULL, out, (DWORD)size); @@ -53,6 +161,110 @@ int get_exe_dir(char *out, size_t size) { #endif #ifdef _WIN32 +static int file_exists(const char *path) { + DWORD attr = GetFileAttributesA(path); + return attr != INVALID_FILE_ATTRIBUTES && !(attr & FILE_ATTRIBUTE_DIRECTORY); +} + +static int dir_exists(const char *path) { + DWORD attr = GetFileAttributesA(path); + return attr != INVALID_FILE_ATTRIBUTES && (attr & FILE_ATTRIBUTE_DIRECTORY); +} + +static int copy_text(char *dest, size_t size, const char *src) { + size_t len = strlen(src); + + if (len >= size) { + return 0; + } + + memcpy(dest, src, len + 1); + return 1; +} + +static int parent_dir_of(const char *path, char *out, size_t size) { + if (!copy_text(out, size, path)) { + return 0; + } + + char *last = strrchr(out, '\\'); + if (last == NULL) { + return 0; + } + + *last = '\0'; + return 1; +} + +static int trim_line(char *line) { + size_t len = strlen(line); + while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r' || line[len - 1] == ' ' || line[len - 1] == '\t')) { + line[len - 1] = '\0'; + len--; + } + + return 1; +} + +static int read_pyvenv_home(const char *cfg_path, char *out, size_t size) { + FILE *cfg = fopen(cfg_path, "r"); + if (cfg == NULL) { + return 0; + } + + char line[2048]; + while (fgets(line, sizeof(line), cfg) != NULL) { + trim_line(line); + + if (strncmp(line, "home =", 6) == 0) { + const char *value = line + 6; + while (*value == ' ' || *value == '\t') { + value++; + } + + fclose(cfg); + return copy_text(out, size, value); + } + } + + fclose(cfg); + return 0; +} + +static int find_repo_root(const char *start_dir, char *out, size_t size) { + char current[1024]; + if (!copy_text(current, sizeof(current), start_dir)) { + return 0; + } + + while (1) { + char pyvenv_cfg[1200]; + int written = snprintf(pyvenv_cfg, sizeof(pyvenv_cfg), "%s\\.venv\\pyvenv.cfg", current); + if (written > 0 && (size_t)written < sizeof(pyvenv_cfg) && file_exists(pyvenv_cfg)) { + return copy_text(out, size, current); + } + + char parent[1024]; + if (!parent_dir_of(current, parent, sizeof(parent))) { + break; + } + + if (strcmp(parent, current) == 0) { + break; + } + + if (strlen(parent) == 2 && parent[1] == ':') { + break; + } + + if (!copy_text(current, sizeof(current), parent)) { + return 0; + } + } + + return 0; +} + static int append_text(char *dest, size_t size, size_t *offset, const char *text) { size_t len = strlen(text); @@ -113,11 +325,71 @@ static int append_windows_arg(char *dest, size_t size, size_t *offset, const cha return append_text(dest, size, offset, "\""); } + +static int spawn_update_helper_windows( + const char *python, + const char *main_py, + const char *launcher_path, + const char *repo_root, + int argc, + char **argv +) { + STARTUPINFOA si = {0}; + PROCESS_INFORMATION pi = {0}; + char cmd[5200]; + char pid_text[32]; + size_t offset = 0; + + si.cb = sizeof(si); + cmd[0] = '\0'; + snprintf(pid_text, sizeof(pid_text), "%lu", (unsigned long)GetCurrentProcessId()); + + if (!append_windows_arg(cmd, sizeof(cmd), &offset, python) || + !append_text(cmd, sizeof(cmd), &offset, " ") || + !append_windows_arg(cmd, sizeof(cmd), &offset, main_py) || + !append_text(cmd, sizeof(cmd), &offset, " ") || + !append_windows_arg(cmd, sizeof(cmd), &offset, "__apply_update") || + !append_text(cmd, sizeof(cmd), &offset, " ") || + !append_windows_arg(cmd, sizeof(cmd), &offset, pid_text) || + !append_text(cmd, sizeof(cmd), &offset, " ") || + !append_windows_arg(cmd, sizeof(cmd), &offset, launcher_path)) { + return 0; + } + + for (int i = 1; i < argc; i++) { + if (!append_text(cmd, sizeof(cmd), &offset, " ") || + !append_windows_arg(cmd, sizeof(cmd), &offset, argv[i])) { + return 0; + } + } + + if (!CreateProcessA( + NULL, + cmd, + NULL, + NULL, + FALSE, + 0, + NULL, + repo_root, + &si, + &pi + )) { + return 0; + } + + CloseHandle(pi.hThread); + CloseHandle(pi.hProcess); + return 1; +} #endif #ifdef __linux__ int run_unix(int argc, char **argv) { char base[1024]; + char repo_root[1024]; + char launcher[1024]; + char target[1024]; char python[1024]; char main_py[1024]; char setup_py[1024]; @@ -129,17 +401,80 @@ int run_unix(int argc, char **argv) { return 1; } - int python_len = snprintf(python, sizeof(python), "%s/.venv/bin/python", base); - int main_py_len = snprintf(main_py, sizeof(main_py), "%s/main.py", base); - int setup_py_len = snprintf(setup_py, sizeof(setup_py), "%s/setup.py", base); + if (!find_repo_root(base, repo_root, sizeof(repo_root))) { + printfe("Celune could not find the repository root with a Python virtual environment.\n"); + return 1; + } - if (python_len < 0 || (size_t)python_len >= sizeof(python) || + int launcher_len = snprintf(launcher, sizeof(launcher), "%s/celune", base); + int target_len = snprintf(target, sizeof(target), "%s/celune-bin", base); + int python_len = snprintf(python, sizeof(python), "%s/.venv/bin/python", repo_root); + int main_py_len = snprintf(main_py, sizeof(main_py), "%s/main.py", repo_root); + int setup_py_len = snprintf(setup_py, sizeof(setup_py), "%s/setup.py", repo_root); + + if (launcher_len < 0 || (size_t)launcher_len >= sizeof(launcher) || + target_len < 0 || (size_t)target_len >= sizeof(target) || + python_len < 0 || (size_t)python_len >= sizeof(python) || main_py_len < 0 || (size_t)main_py_len >= sizeof(main_py) || setup_py_len < 0 || (size_t)setup_py_len >= sizeof(setup_py)) { printfe("Celune cannot start in this location, the path is too long.\n"); return 1; } + if (access(target, X_OK) == 0) { + pid_t pid = fork(); + if (pid == -1) { + perror("fork failed"); + return 1; + } + + if (pid == 0) { + char **args = malloc(((size_t)argc + 1U) * sizeof(char *)); + if (args == NULL) { + perror("malloc failed"); + _exit(1); + } + + args[0] = target; + for (int i = 1; i < argc; i++) { + args[i] = argv[i]; + } + args[argc] = NULL; + + if (chdir(repo_root) != 0) { + perror("chdir failed"); + _exit(1); + } + execv(args[0], args); + + perror("execv failed"); + _exit(1); + } else { + int status; + waitpid(pid, &status, 0); + + if (WIFEXITED(status)) { + int exit_code = WEXITSTATUS(status); + if (exit_code == EXIT_PENDING_UPDATE) { + if (!spawn_update_helper_unix(python, main_py, launcher, repo_root, argc, argv)) { + printfe("Celune could not start its update helper.\n"); + return 1; + } + return 0; + } + return exit_code; + } + else if (WIFSIGNALED(status)) { + int sig = WTERMSIG(status); + + printfe("Celune was killed by signal %d.\n", sig); + return 128 + sig; + } + } + + return 1; + } + if (access(python, X_OK) != 0) { const char *system_python[] = {"python3", "python"}; int found_system_python = 0; @@ -162,7 +497,7 @@ int run_unix(int argc, char **argv) { if (setup_pid == 0) { char *args[] = {(char *)system_python[i], setup_py, NULL}; - if (chdir(base) != 0) { + if (chdir(repo_root) != 0) { perror("chdir failed"); _exit(1); } @@ -221,7 +556,7 @@ int run_unix(int argc, char **argv) { } args[argc + 1] = NULL; - if (chdir(base) != 0) { + if (chdir(repo_root) != 0) { perror("chdir failed"); _exit(1); } @@ -249,9 +584,20 @@ int run_unix(int argc, char **argv) { #elif defined(_WIN32) int run_windows(int argc, char **argv) { char base[1024]; - char python[1024]; - char main_py[1024]; - char setup_py[1024]; + char launcher[1024]; + char target[1024]; + char repo_root[1024]; + char pyvenv_cfg[1200]; + char python_home[1024]; + char python_dlls[1200]; + char python_lib[1200]; + char venv_root[1200]; + char venv_python[1400]; + char main_py[1400]; + char site_packages[1400]; + char setuptools_vendor[1600]; + char nuitka_pythonpath[5200]; + char updated_path[5200]; SetEnvironmentVariableA("CELUNE_LAUNCHER", "1"); @@ -260,84 +606,93 @@ int run_windows(int argc, char **argv) { return 1; } - int python_len = snprintf(python, sizeof(python), "%s\\.venv\\Scripts\\python.exe", base); - int main_py_len = snprintf(main_py, sizeof(main_py), "%s\\main.py", base); - int setup_py_len = snprintf(setup_py, sizeof(setup_py), "%s\\setup.py", base); - - if (python_len < 0 || (size_t)python_len >= sizeof(python) || - main_py_len < 0 || (size_t)main_py_len >= sizeof(main_py) || - setup_py_len < 0 || (size_t)setup_py_len >= sizeof(setup_py)) { + int launcher_len = snprintf(launcher, sizeof(launcher), "%s\\celune.exe", base); + int target_len = snprintf(target, sizeof(target), "%s\\celune-bin.exe", base); + if (launcher_len < 0 || (size_t)launcher_len >= sizeof(launcher) || + target_len < 0 || (size_t)target_len >= sizeof(target)) { printfe("Celune cannot start in this location, the path is too long.\n"); return 1; } - DWORD attr = GetFileAttributesA(python); - if (attr == INVALID_FILE_ATTRIBUTES) { - DWORD setup_attr = GetFileAttributesA(setup_py); - if (setup_attr == INVALID_FILE_ATTRIBUTES) { - printfe("Python virtual environment and/or interpreter was not found or isn't working.\n"); - printfe("Celune needs setup.py to create its virtual environment.\n"); - return 1; - } + if (!file_exists(target)) { + printfe("Celune could not find its compiled runtime binary.\n"); + printfe("Expected file: %s\n", target); + return 1; + } - printfe("Python virtual environment was not found. Running setup.py...\n"); + if (!find_repo_root(base, repo_root, sizeof(repo_root))) { + printfe("Celune could not find the repository root with a Python virtual environment.\n"); + return 1; + } - STARTUPINFOA setup_si = {0}; - PROCESS_INFORMATION setup_pi = {0}; - setup_si.cb = sizeof(setup_si); - setup_si.dwFlags = STARTF_USESHOWWINDOW; - setup_si.wShowWindow = SW_SHOW; + int pyvenv_cfg_len = snprintf(pyvenv_cfg, sizeof(pyvenv_cfg), "%s\\.venv\\pyvenv.cfg", repo_root); + int venv_root_len = snprintf(venv_root, sizeof(venv_root), "%s\\.venv", repo_root); + int venv_python_len = snprintf(venv_python, sizeof(venv_python), "%s\\Scripts\\python.exe", venv_root); + int main_py_len = snprintf(main_py, sizeof(main_py), "%s\\main.py", repo_root); + int site_packages_len = snprintf(site_packages, sizeof(site_packages), "%s\\Lib\\site-packages", venv_root); + int setuptools_vendor_len = snprintf(setuptools_vendor, sizeof(setuptools_vendor), "%s\\setuptools\\_vendor", site_packages); + if (pyvenv_cfg_len < 0 || (size_t)pyvenv_cfg_len >= sizeof(pyvenv_cfg) || + venv_root_len < 0 || (size_t)venv_root_len >= sizeof(venv_root) || + venv_python_len < 0 || (size_t)venv_python_len >= sizeof(venv_python) || + main_py_len < 0 || (size_t)main_py_len >= sizeof(main_py) || + site_packages_len < 0 || (size_t)site_packages_len >= sizeof(site_packages) || + setuptools_vendor_len < 0 || (size_t)setuptools_vendor_len >= sizeof(setuptools_vendor)) { + printfe("Celune cannot start in this location, the path is too long.\n"); + return 1; + } - char setup_cmd[2200]; - int setup_written = snprintf(setup_cmd, sizeof(setup_cmd), "python.exe \"%s\"", setup_py); - if (setup_written < 0 || (size_t)setup_written >= sizeof(setup_cmd)) { - printfe("Celune cannot start setup.py, the command line is too long.\n"); - return 1; - } + if (!file_exists(pyvenv_cfg) || !read_pyvenv_home(pyvenv_cfg, python_home, sizeof(python_home))) { + printfe("Celune could not determine the base Python installation from .venv\\pyvenv.cfg.\n"); + return 1; + } - BOOL setup_ok = CreateProcessA( - NULL, - setup_cmd, - NULL, - NULL, - FALSE, - 0, - NULL, - base, - &setup_si, - &setup_pi - ); - - if (!setup_ok) { - DWORD error = GetLastError(); - if (error == ERROR_FILE_NOT_FOUND || error == ERROR_PATH_NOT_FOUND) { - printfe("Celune could not find a system Python interpreter to run setup.py.\n"); - printfe("Install Python 3.12 or 3.13 and run Celune again.\n"); - } else { - printfe("Celune could not launch setup.py.\n%lu\n", error); - } - return 1; - } + int python_dlls_len = snprintf(python_dlls, sizeof(python_dlls), "%s\\DLLs", python_home); + int python_lib_len = snprintf(python_lib, sizeof(python_lib), "%s\\Lib", python_home); + if (python_dlls_len < 0 || (size_t)python_dlls_len >= sizeof(python_dlls) || + python_lib_len < 0 || (size_t)python_lib_len >= sizeof(python_lib)) { + printfe("Celune cannot start in this location, the path is too long.\n"); + return 1; + } - WaitForSingleObject(setup_pi.hProcess, INFINITE); + if (!dir_exists(python_home) || !dir_exists(python_lib) || !dir_exists(site_packages)) { + printfe("Celune could not find the required Python runtime directories.\n"); + return 1; + } - DWORD setup_exit_code = 1; - GetExitCodeProcess(setup_pi.hProcess, &setup_exit_code); + int nuitka_pythonpath_len = snprintf( + nuitka_pythonpath, + sizeof(nuitka_pythonpath), + "%s;%s;%s;%s;%s;%s;%s", + repo_root, + python_dlls, + python_lib, + python_home, + venv_root, + site_packages, + setuptools_vendor + ); + if (nuitka_pythonpath_len < 0 || (size_t)nuitka_pythonpath_len >= sizeof(nuitka_pythonpath)) { + printfe("Celune cannot set up its Python path, the path is too long.\n"); + return 1; + } - CloseHandle(setup_pi.hThread); - CloseHandle(setup_pi.hProcess); + DWORD path_len = GetEnvironmentVariableA("PATH", updated_path, (DWORD)sizeof(updated_path)); + if (path_len == 0 || path_len >= sizeof(updated_path)) { + updated_path[0] = '\0'; + } - if (setup_exit_code != 0) { - printfe("Celune setup failed.\n"); - return (int)setup_exit_code; - } + char path_value[5200]; + int updated_path_len = snprintf(path_value, sizeof(path_value), "%s;%s", python_home, updated_path); + if (updated_path_len < 0 || (size_t)updated_path_len >= sizeof(path_value)) { + printfe("Celune cannot set up PATH, the path is too long.\n"); + return 1; + } - attr = GetFileAttributesA(python); - if (attr == INVALID_FILE_ATTRIBUTES) { - printfe("Python virtual environment and/or interpreter was not found or isn't working.\n"); - printfe("Celune needs a working Python interpreter and virtual environment to operate.\n"); - return 1; - } + if (!SetEnvironmentVariableA("PATH", path_value) || + !SetEnvironmentVariableA("PYTHONHOME", python_home) || + !SetEnvironmentVariableA("NUITKA_PYTHONPATH", nuitka_pythonpath)) { + printfe("Celune could not configure its Python runtime environment.\n"); + return 1; } STARTUPINFOA si = {0}; @@ -351,9 +706,7 @@ int run_windows(int argc, char **argv) { size_t offset = 0; cmd[0] = '\0'; - if (!append_windows_arg(cmd, sizeof(cmd), &offset, python) || - !append_text(cmd, sizeof(cmd), &offset, " ") || - !append_windows_arg(cmd, sizeof(cmd), &offset, main_py)) { + if (!append_windows_arg(cmd, sizeof(cmd), &offset, target)) { printfe("Celune cannot start in this location, the command line is too long.\n"); return 1; } @@ -374,13 +727,13 @@ int run_windows(int argc, char **argv) { FALSE, 0, NULL, - base, + repo_root, &si, &pi ); if (!ok) { - printfe("Celune could not launch Python.\n%lu\n", GetLastError()); + printfe("Celune could not launch its compiled runtime.\n%lu\n", GetLastError()); return 1; } @@ -392,6 +745,18 @@ int run_windows(int argc, char **argv) { CloseHandle(pi.hThread); CloseHandle(pi.hProcess); + if ((int)exit_code == EXIT_PENDING_UPDATE) { + if (!file_exists(venv_python) || !file_exists(main_py)) { + printfe("Celune could not find the Python helper needed to apply updates.\n"); + return 1; + } + if (!spawn_update_helper_windows(venv_python, main_py, launcher, repo_root, argc, argv)) { + printfe("Celune could not start its update helper.\n"); + return 1; + } + return 0; + } + return (int)exit_code; } #endif diff --git a/main.py b/main.py index 9ffb32c..c7c3647 100755 --- a/main.py +++ b/main.py @@ -5,14 +5,33 @@ import sys import importlib.util from pathlib import Path -from types import ModuleType from typing import Optional +from types import ModuleType -from celune.constants import APP_NAME, APP_SLUG +# fallback for unsupported interpreters (Python 3.11 and below) +APP_NAME = "Celune" +APP_SLUG = "".join(char if char.isalnum() else "_" for char in APP_NAME.lower()) _ENTRYPOINT_MODULE: Optional[ModuleType] = None +def _too_old_python() -> bool: + """Return whether the current interpreter is too old.""" + return sys.version_info < (3, 12) + + +def _print_too_old_python_notice(command: Optional[str] = None) -> None: + """Print a user-facing unsupported Python version notice, bypassing app imports.""" + version = ".".join(str(part) for part in sys.version_info[:3]) + print(f"{APP_NAME} will not run on Python {version}.") + print("Please use at least Python 3.12 to use the CLI.") + print( + f"Run `uv sync` in {APP_NAME}'s directory to set up the supported environment." + ) + if command == "doctor": + print(f"`{APP_NAME.lower()} doctor` can't run on this interpreter.") + + def load_entrypoint_module() -> ModuleType: """Public interface for loading the app entrypoint module. @@ -50,7 +69,14 @@ def main(argv: Optional[list[str]] = None) -> None: Args: argv: Arguments to pass through to the entrypoint handler. """ - _load_entrypoint_module().main(sys.argv if argv is None else argv) + resolved_argv = sys.argv if argv is None else argv + command = resolved_argv[1].strip().lower() if len(resolved_argv) >= 2 else None + + if _too_old_python(): + _print_too_old_python_notice(command) + sys.exit(6) + + _load_entrypoint_module().main(resolved_argv) if __name__ == "__main__": diff --git a/nuitka_main.py b/nuitka_main.py new file mode 100644 index 0000000..b294283 --- /dev/null +++ b/nuitka_main.py @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: MIT +from celune.entrypoint import main + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 064fc10..d592ca2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "celune" -version = "4.0.1" +version = "4.1.0" description = "Real-time AI TTS character engine with expressive voices and high-quality playback" readme = "README.md" requires-python = ">=3.12,<3.14" @@ -24,6 +24,8 @@ dependencies = [ # If you want Celune to use the alternative VoxCPM2-based backend, install this package. # This backend may not be available on some VRAM presets. Refer to Celune's default configuration for details. "voxcpm>=2.0.2", + # dots.tts is installed from Celunah's Windows-safe fork at this time. + "dots.tts @ git+https://github.com/celunah/dots.tts", # Pocket TTS is a new backend added in version 4.0.0, that allows Celune to sound the part and use up very little # memory, just under 200 MB! "pocket-tts>=2.1.0", @@ -68,6 +70,7 @@ dependencies = [ # Used to detect language of input. "langdetect>=1.0.0,<2.0.0", "iso639-lang>=2.0.0,<3.0.0", + "yt-dlp", ] [project.urls] @@ -105,7 +108,8 @@ dev = [ # If not installed, type checkers may complain. "types-pyyaml", "types-psutil", - "types-tqdm" + "types-tqdm", + "nuitka>=4.1.2", ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index aade266..6745ed6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,8 @@ faster-qwen3-tts>=0.2.4 # If you want Celune to use the alternative VoxCPM2-based backend, install this package. # This backend may not be available on some VRAM presets. Refer to Celune's default configuration for details. voxcpm>=2.0.0,<2.1.0 +# dots.tts is currently installed from Celunah's Windows-safe fork. +dots.tts @ git+https://github.com/celunah/dots.tts # Pocket TTS is a new backend added in version 4.0.0, that allows Celune to sound the part and use up very little # memory, just under 200 MB! pocket-tts>=2.1.0 @@ -57,6 +59,7 @@ httpx>=0.27.0,<0.29.0 # Used to detect language of input. langdetect>=1.0.0,<2.0.0 iso639-lang>=2.0.0,<3.0.0 +yt-dlp # API uvicorn diff --git a/resources/branding/celune_moon.svg b/resources/branding/celune.svg similarity index 91% rename from resources/branding/celune_moon.svg rename to resources/branding/celune.svg index 157dda2..524485b 100644 --- a/resources/branding/celune_moon.svg +++ b/resources/branding/celune.svg @@ -2,7 +2,7 @@ - + diff --git a/resources/branding/celune_88x31_206.png b/resources/branding/celune_88x31_206.png index 3c37349..834cd6f 100644 Binary files a/resources/branding/celune_88x31_206.png and b/resources/branding/celune_88x31_206.png differ diff --git a/resources/branding/celune_88x31_206_light.png b/resources/branding/celune_88x31_206_light.png new file mode 100644 index 0000000..46e7aeb Binary files /dev/null and b/resources/branding/celune_88x31_206_light.png differ diff --git a/resources/branding/celune_88x31_basic.png b/resources/branding/celune_88x31_basic.png index 82294c9..f5a1b65 100644 Binary files a/resources/branding/celune_88x31_basic.png and b/resources/branding/celune_88x31_basic.png differ diff --git a/resources/branding/celune_88x31_basic_light.png b/resources/branding/celune_88x31_basic_light.png new file mode 100644 index 0000000..7d56863 Binary files /dev/null and b/resources/branding/celune_88x31_basic_light.png differ diff --git a/resources/branding/celune_dark.svg b/resources/branding/celune_dark.svg new file mode 100644 index 0000000..993ae4c --- /dev/null +++ b/resources/branding/celune_dark.svg @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/resources/branding/celune_wordmark.png b/resources/branding/celune_wordmark.png index f490b7c..4d82e57 100644 Binary files a/resources/branding/celune_wordmark.png and b/resources/branding/celune_wordmark.png differ diff --git a/resources/branding/celune_wordmark_dark.png b/resources/branding/celune_wordmark_dark.png new file mode 100644 index 0000000..31545c3 Binary files /dev/null and b/resources/branding/celune_wordmark_dark.png differ diff --git a/resources/branding/celune_wordmark_transparent.png b/resources/branding/celune_wordmark_transparent.png deleted file mode 100644 index 2044894..0000000 Binary files a/resources/branding/celune_wordmark_transparent.png and /dev/null differ diff --git a/resources/celune.ico b/resources/celune.ico new file mode 100644 index 0000000..434cc60 Binary files /dev/null and b/resources/celune.ico differ diff --git a/resources/celune.res b/resources/celune.res index 1e06485..aaba4df 100644 Binary files a/resources/celune.res and b/resources/celune.res differ diff --git a/scripts/build_nuitka.ps1 b/scripts/build_nuitka.ps1 new file mode 100644 index 0000000..e71da65 --- /dev/null +++ b/scripts/build_nuitka.ps1 @@ -0,0 +1,135 @@ +$ErrorActionPreference = "Stop" + +$repoRoot = Split-Path -Parent $PSScriptRoot +$outputDir = Join-Path $repoRoot "bin" +$templateExe = Join-Path $repoRoot "celune.exe" +$iconIco = Join-Path $repoRoot "resources\celune.ico" +$launcherSource = Join-Path $repoRoot "launcher.c" +$launcherRes = Join-Path $repoRoot "resources\celune.res" +$vswhere = Join-Path ${env:ProgramFiles(x86)} "Microsoft Visual Studio\Installer\vswhere.exe" +$projectVersion = Select-String -Path (Join-Path $repoRoot "pyproject.toml") -Pattern '^version = "([^"]+)"' | Select-Object -First 1 + +$env:CL = "/O2 /GL /GS /guard:cf /DNDEBUG" +$env:_CL_ = "/link /LTCG /OPT:REF /OPT:ICF /DYNAMICBASE /NXCOMPAT" + +if ($null -eq $projectVersion) { + throw "Could not determine the project version from pyproject.toml." +} + +$version = $projectVersion.Matches[0].Groups[1].Value +$windowsVersion = (($version -split '\+') | Select-Object -First 1) +if ($windowsVersion -notmatch '^\d+(?:\.\d+){0,3}$') { + throw "The project version '$windowsVersion' is not a valid Windows version string." +} + +if (-not (Test-Path (Join-Path $repoRoot "nuitka_main.py"))) { + throw "nuitka_main.py was not found." +} + +if (-not (Test-Path (Join-Path $repoRoot "resources\celune.res"))) { + throw "resources\celune.res was not found." +} + +$env:UV_CACHE_DIR = Join-Path $repoRoot ".uv-cache" + +New-Item -ItemType Directory -Force -Path $outputDir | Out-Null + +$staleBuildArtifacts = @( + (Join-Path $outputDir "default_config.yaml"), + (Join-Path $outputDir "voices"), + (Join-Path $outputDir "resources"), + (Join-Path $outputDir "assets") +) +foreach ($stalePath in $staleBuildArtifacts) { + if (Test-Path $stalePath) { + Remove-Item -LiteralPath $stalePath -Recurse -Force + } +} + +$arguments = @( + "run", + "python", + "-m", + "nuitka", + "--deployment", + "--msvc=latest", + "--follow-import-to=celune", + "--include-package-data=celune", + "--windows-console-mode=force", + "--product-name=Celune", + "--file-description=Celune", + "--product-version=$windowsVersion", + "--file-version=$windowsVersion", + "--output-dir=$outputDir", + "--output-filename=celune-bin.exe", + "--lto=yes", + "$repoRoot\nuitka_main.py" +) + +if (Test-Path $iconIco) { + $arguments += "--windows-icon-from-ico=$iconIco" +} +elseif (Test-Path $templateExe) { + $arguments += "--windows-icon-from-exe=$templateExe" +} + +& uv @arguments +if ($LASTEXITCODE -ne 0) { + throw "Nuitka build failed with exit code $LASTEXITCODE." +} + +if (-not (Test-Path $launcherSource)) { + throw "launcher.c was not found." +} + +$launcherExe = Join-Path $outputDir "celune.exe" +$launcherObj = Join-Path $outputDir "launcher.obj" +if (-not (Test-Path $vswhere)) { + throw "vswhere.exe was not found." +} + +$vsInstall = & $vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath +if (-not $vsInstall) { + throw "Could not locate a Visual Studio installation with C++ build tools." +} + +$vsDevCmd = Join-Path $vsInstall "Common7\Tools\VsDevCmd.bat" +if (-not (Test-Path $vsDevCmd)) { + throw "VsDevCmd.bat was not found." +} + +$env:CL = $null +$env:_CL_ = $null + +$compileCmd = "call `"$vsDevCmd`" -arch=amd64 -host_arch=amd64 >nul && cl /nologo /O2 /GL /GS /guard:cf /W4 /DNDEBUG /Fe:`"$launcherExe`" /Fo:`"$launcherObj`" `"$launcherSource`" `"$launcherRes`" /link /LTCG /OPT:REF /OPT:ICF /DYNAMICBASE /NXCOMPAT" +& cmd /c $compileCmd +if ($LASTEXITCODE -ne 0) { + throw "Failed to compile the Windows launcher." +} + +$revision = (& git -C $repoRoot rev-parse HEAD).Trim() +if (-not $revision) { + throw "Could not determine the Git revision for update metadata." +} + +$manifest = [ordered]@{ + version = $windowsVersion + revision = $revision + artifact = "Celune-win-x64" + files = [ordered]@{ + "celune.exe" = (Get-FileHash -Algorithm SHA256 $launcherExe).Hash.ToLowerInvariant() + "celune-bin.exe" = (Get-FileHash -Algorithm SHA256 (Join-Path $outputDir "celune-bin.exe")).Hash.ToLowerInvariant() + } +} + +$manifestPath = Join-Path $outputDir "celune-update.json" +$manifest | ConvertTo-Json -Depth 4 | Set-Content -Encoding UTF8 $manifestPath + +$buildDir = Join-Path $outputDir "nuitka_main.build" +if (Test-Path $buildDir) { + Remove-Item -LiteralPath $buildDir -Recurse -Force +} + +if (Test-Path $launcherObj) { + Remove-Item -LiteralPath $launcherObj -Force +} diff --git a/scripts/build_nuitka.sh b/scripts/build_nuitka.sh new file mode 100755 index 0000000..ad3fc50 --- /dev/null +++ b/scripts/build_nuitka.sh @@ -0,0 +1,144 @@ +#!/usr/bin/env bash +set -euo pipefail + +export CFLAGS="-O2 -DNDEBUG -fstack-protector-strong -D_FORTIFY_SOURCE=3" +export CXXFLAGS="$CFLAGS" +export LDFLAGS="-Wl,-z,relro,-z,now" + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +output_dir="$repo_root/bin" +app_dir="$output_dir/Celune.AppDir" +desktop_src="$repo_root/Celune.AppDir/celune.desktop" +icon_src="$repo_root/Celune.AppDir/celune.png" + +version_line="$(grep -m1 '^version = "' "$repo_root/pyproject.toml")" +if [[ -z "$version_line" ]]; then + echo "Could not determine the project version from pyproject.toml." >&2 + exit 1 +fi + +version="${version_line#version = \"}" +version="${version%\"}" + +if [[ ! -f "$repo_root/nuitka_main.py" ]]; then + echo "nuitka_main.py was not found." >&2 + exit 1 +fi + +if [[ ! -f "$repo_root/launcher.c" ]]; then + echo "launcher.c was not found." >&2 + exit 1 +fi + +if [[ ! -f "$desktop_src" || ! -f "$icon_src" ]]; then + echo "Celune.AppDir metadata files were not found." >&2 + exit 1 +fi + +if ! command -v gcc >/dev/null 2>&1; then + echo "gcc is required to build the Linux launcher." >&2 + exit 1 +fi + +if ! command -v appimagetool >/dev/null 2>&1; then + echo "appimagetool is required to create the AppImage." >&2 + exit 1 +fi + +export UV_CACHE_DIR="$repo_root/.uv-cache" +if [[ "$repo_root" == /mnt/* ]]; then + export UV_CACHE_DIR="${XDG_CACHE_HOME:-$HOME/.cache}/celune-uv" +fi +mkdir -p "$UV_CACHE_DIR" + +mkdir -p "$output_dir" +rm -rf \ + "$output_dir/default_config.yaml" \ + "$output_dir/voices" \ + "$output_dir/resources" \ + "$output_dir/assets" + +uv run python -m nuitka \ + --deployment \ + --follow-import-to=celune \ + --include-package-data=celune \ + --output-dir="$output_dir" \ + --output-filename=celune-bin \ + --lto=yes \ + "$repo_root/nuitka_main.py" + +gcc -O2 -s -Wall -Wextra -Wpedantic \ + -DNDEBUG -D_FORTIFY_SOURCE=3 -fstack-protector-strong \ + -flto -Wl,-z,relro,-z,now -o "$output_dir/celune" "$repo_root/launcher.c" +chmod +x "$output_dir/celune" "$output_dir/celune-bin" + +rm -rf "$output_dir/nuitka_main.build" + +rm -rf "$app_dir" +mkdir -p "$app_dir" +cp "$desktop_src" "$app_dir/celune.desktop" +cp "$icon_src" "$app_dir/celune.png" +ln -sfn "celune.png" "$app_dir/.DirIcon" +cat > "$app_dir/AppRun" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail + +appimage_path="${APPIMAGE:-$0}" +appimage_dir="$(cd "$(dirname "$(readlink -f "$appimage_path")")" && pwd)" +launcher="$appimage_dir/celune" + +if [[ ! -x "$launcher" ]]; then + echo "Celune launcher not found beside AppImage: $launcher" >&2 + exit 1 +fi + +exec "$launcher" "$@" +EOF +chmod +x "$app_dir/AppRun" + +arch="${ARCH:-$(uname -m)}" +case "$arch" in + x86_64|amd64) + appimage_arch="x86_64" + ;; + aarch64|arm64) + appimage_arch="aarch64" + ;; + *) + appimage_arch="$arch" + ;; +esac + +ARCH="$appimage_arch" appimagetool "$app_dir" "$output_dir/celune.AppImage" +rm -rf "$app_dir" + +revision="$(git -C "$repo_root" rev-parse HEAD)" +if [[ -z "$revision" ]]; then + echo "Could not determine the Git revision for update metadata." >&2 + exit 1 +fi + +CELUNE_OUTPUT_DIR="$output_dir" CELUNE_VERSION="$version" CELUNE_REVISION="$revision" CELUNE_ARCH="$appimage_arch" uv run python - <<'EOF' +import hashlib +import json +import os +from pathlib import Path + +output_dir = Path(os.environ["CELUNE_OUTPUT_DIR"]) +arch = os.environ.get("CELUNE_ARCH", "x86_64") +manifest = { + "version": os.environ["CELUNE_VERSION"], + "revision": os.environ["CELUNE_REVISION"], + "artifact": f"Celune-linux-{arch}", + "files": {}, +} +for name in ("celune", "celune-bin", "celune.AppImage"): + path = output_dir / name + if path.is_file(): + manifest["files"][name] = hashlib.sha256(path.read_bytes()).hexdigest() + +(output_dir / "celune-update.json").write_text( + json.dumps(manifest, indent=2) + "\n", + encoding="utf-8", +) +EOF diff --git a/scripts/cac.py b/scripts/cac.py index d35eec8..44eb39f 100644 --- a/scripts/cac.py +++ b/scripts/cac.py @@ -3,10 +3,10 @@ import sys from pathlib import Path -from typing import TypedDict, Mapping, Union, Optional, cast +from typing import TypedDict, Union, Optional, cast try: - from celune.cevoice import write_cevoice, ManifestValue + from celune.cevoice import write_cevoice except ModuleNotFoundError: PROJECT_ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(PROJECT_ROOT)) diff --git a/scripts/run_ci.py b/scripts/run_ci.py index 065e425..6d9abfa 100644 --- a/scripts/run_ci.py +++ b/scripts/run_ci.py @@ -18,6 +18,41 @@ cmds_failed = 0 total_errors = [] +_CACHE_PERMISSION_MARKERS = ( + "Access is denied.", + "Access is denied", + "Permission denied", +) + + +def _run_uv_command(*cmd: str) -> None: + """Run one uv-backed CI command, retrying without cache on permission errors.""" + base_cmd = ["uv", "run", *cmd] + try: + subprocess.run( + base_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + text=True, + timeout=180, + ) + return + except subprocess.CalledProcessError as failed: + combined_output = f"{failed.stdout}\n{failed.stderr}" + if not any(marker in combined_output for marker in _CACHE_PERMISSION_MARKERS): + raise + + subprocess.run( + ["uv", "--no-cache", "run", *cmd], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + text=True, + timeout=300, + ) + + if len(CI_COMMANDS) != len(CI_PATHS): raise RuntimeError( f"CI configuration mismatch: {len(CI_COMMANDS)} commands for {len(CI_PATHS)} path entries" @@ -30,14 +65,7 @@ bar_format="{l_bar}{bar} | {n_fmt}/{total_fmt}", ): try: - subprocess.run( - ["uv", "run", *cmd, *paths], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=True, - text=True, - timeout=300, - ) + _run_uv_command(*cmd, *paths) except subprocess.CalledProcessError as failed: cmds_failed += 1 if failed.stdout: diff --git a/tests/support.py b/tests/support.py index 39875eb..e530609 100644 --- a/tests/support.py +++ b/tests/support.py @@ -3,15 +3,15 @@ from __future__ import annotations -import contextlib -import importlib -import queue import sys +import queue import threading +import importlib +import contextlib from pathlib import Path from unittest import mock +from types import SimpleNamespace, ModuleType from collections.abc import Iterator -from types import SimpleNamespace from typing import TYPE_CHECKING, Optional, TypedDict import numpy as np @@ -112,6 +112,7 @@ def __init__( self.finished = threading.Event() self.finished.set() self.scheduled: list[npt.NDArray[np.float32]] = [] + self.reset_audio_reactivity_called = False def start(self) -> bool: """Mark the fake glow as started. @@ -160,17 +161,26 @@ def schedule(self, audio: npt.NDArray[np.float32]) -> None: """ self.scheduled.append(audio) + def reset_audio_reactivity(self) -> None: + """Record that live audio-reactive glow state was cleared.""" + self.reset_audio_reactivity_called = True + class FakeStream: """Minimal output-stream fake that records lifecycle operations.""" def __init__(self) -> None: """Initialize fake stream state.""" + self.started = False self.stopped = False self.aborted = False self.closed = False self.written: list[npt.NDArray[np.float32]] = [] + def start(self) -> None: + """Record stream startup.""" + self.started = True + def stop(self) -> None: """Record a graceful stream stop.""" self.stopped = True @@ -215,6 +225,13 @@ def make_pipeline_engine() -> SimpleNamespace: engine.loaded = True engine.locked = False engine.cur_state = "idle" + engine.exit_requested = False + engine.stream = None + engine._stream = None + engine.current_sr = None + engine._current_sr = None + engine.audio_unavailable = False + engine._audio_unavailable = False engine.text_queue = queue.Queue() engine.audio_queue = queue.Queue() engine.say_lock = threading.Lock() @@ -231,6 +248,11 @@ def make_pipeline_engine() -> SimpleNamespace: (msg, severity) ) engine.progress_callback = lambda current, total: progress.append((current, total)) + engine.idle_callback = mock.Mock() + engine.glow = SimpleNamespace( + schedule=mock.Mock(), + reset_audio_reactivity=mock.Mock(), + ) engine.messages = messages engine.errors = errors engine.statuses = statuses @@ -249,7 +271,7 @@ def make_voice_loader( metadata: The voice metadata. Returns: - A CEVOICE loader stub for the given voice. + SimpleNamespace: A CEVOICE loader stub for the given voice. """ return SimpleNamespace( bundle=SimpleNamespace(voices={voice: metadata}, voice_order=(voice,)), @@ -292,6 +314,30 @@ class StubVoxCPM: yield voxcpm2.VoxCPM2 +@contextlib.contextmanager +def mock_dotstts_backend(): + """Import the dots.tts backend with a stub dots_tts package.""" + + class StubDotsTtsRuntime: + """Import-time stand-in for the dots.tts runtime class.""" + + package = ModuleType("dots_tts") + package.__path__ = [] + runtime_module = ModuleType("dots_tts.runtime") + runtime_module.DotsTtsRuntime = StubDotsTtsRuntime # type: ignore[missing-attribute] + package.runtime = runtime_module # type: ignore[missing-attribute] + + with mock.patch.dict( + sys.modules, + { + "dots_tts": package, + "dots_tts.runtime": runtime_module, + }, + ): + dotstts = importlib.import_module("celune.backends.dotstts") + yield dotstts.DotsTtsMF + + @contextlib.contextmanager def mock_mini_backend(): """Import the Mini backend with a stub pocket-tts package.""" diff --git a/tests/test_analysis_and_chroma.py b/tests/test_analysis_and_chroma.py index b3aa879..9960833 100644 --- a/tests/test_analysis_and_chroma.py +++ b/tests/test_analysis_and_chroma.py @@ -7,8 +7,9 @@ import numpy as np from celune import analysis -from celune.constants import N_A_NUMERIC +from celune.colors import RGB from celune.chroma import AudioRGBGlow +from celune.constants import N_A_NUMERIC class AnalysisTests(TestCase): @@ -224,3 +225,63 @@ def test_sleep_and_wake_preserve_prior_brightness_target(self) -> None: self.assertEqual(glow._state, "waking") self.assertAlmostEqual(glow._target_brightness, 0.6) + + def test_glow_target_follows_smoothed_audio_rms_without_snapping_to_max( + self, + ) -> None: + """Verify glow brightness tracks RMS amplitude smoothly.""" + glow = AudioRGBGlow(celune=None, color="#ffffff") + quiet = np.full((glow.fps, 2), 0.05, dtype=np.float32) + peak = np.zeros((glow.fps, 2), dtype=np.float32) + peak[0] = 1.0 + + glow._process_glow_chunk(quiet, 0.0) + quiet_target = glow._target_brightness + self.assertGreater(quiet_target, glow.idle_brightness) + self.assertLess(quiet_target, glow.max_brightness) + + glow._process_glow_chunk(peak, 0.1) + self.assertLess(glow._target_brightness, glow.max_brightness) + + def test_glow_worker_uses_audio_target_without_fixed_pulse_logic(self) -> None: + """Verify the normal glow branch follows audio target directly.""" + glow = AudioRGBGlow(celune=None, color="#ffffff") + glow._state = "normal" + glow._current_brightness = glow.idle_brightness + glow._target_brightness = min(glow.max_brightness, glow.idle_brightness + 0.4) + glow._current_color = glow.base_color.copy() + glow._target_color = glow.base_color.copy() + + writes: list[RGB] = [] + glow._set_all_devices = lambda rgb: writes.append( + (int(rgb[0]), int(rgb[1]), int(rgb[2])) + ) + + def stop_after_two_sleeps(_seconds: float) -> None: + if len(writes) >= 2: + glow._stop_event.set() + + with mock.patch("celune.chroma.time.sleep", side_effect=stop_after_two_sleeps): + glow._run() + + self.assertGreaterEqual(len(writes), 1) + self.assertGreater(glow._current_brightness, glow.idle_brightness) + + def test_reset_audio_reactivity_clears_pending_audio_and_restores_idle( + self, + ) -> None: + """Verify abrupt playback resets the audio-reactive envelope to idle.""" + glow = AudioRGBGlow(celune=None, color="#ffffff") + glow._worker = mock.Mock() + glow._worker.is_alive.return_value = True + glow._state = "normal" + glow._smoothed_level = 0.8 + glow._target_brightness = min(glow.max_brightness, glow.idle_brightness + 0.4) + glow._scheduled_chunks.append((0.0, np.ones((32, 2), dtype=np.float32))) + + glow.reset_audio_reactivity() + + self.assertEqual(len(glow._scheduled_chunks), 0) + self.assertEqual(glow._smoothed_level, 0.0) + self.assertEqual(glow._state, "normal") + self.assertAlmostEqual(glow._target_brightness, glow.idle_brightness) diff --git a/tests/test_api_audio.py b/tests/test_api_audio.py index 1bddf3c..e6d643f 100644 --- a/tests/test_api_audio.py +++ b/tests/test_api_audio.py @@ -5,17 +5,17 @@ import json import time import queue -from typing import cast, Optional from unittest import TestCase from types import SimpleNamespace +from typing import cast, Optional import numpy as np import soundfile as sf from starlette.responses import Response from celune import api -from celune.pipeline import SpeechStreamQueue from celune.celune import Celune +from celune.pipeline import SpeechStreamQueue class ApiAudioTests(TestCase): diff --git a/tests/test_api_think.py b/tests/test_api_think.py index 0893d4c..223080b 100644 --- a/tests/test_api_think.py +++ b/tests/test_api_think.py @@ -2,9 +2,9 @@ """Tests for Persona think API control routes.""" import json +from typing import cast from unittest import TestCase from types import SimpleNamespace -from typing import cast from fastapi import HTTPException diff --git a/tests/test_api_webui.py b/tests/test_api_webui.py new file mode 100644 index 0000000..32347f7 --- /dev/null +++ b/tests/test_api_webui.py @@ -0,0 +1,450 @@ +# SPDX-License-Identifier: MIT +"""Tests for Celune's browser-facing API UI.""" + +from types import SimpleNamespace +from queue import Queue +from typing import cast +from unittest import TestCase, mock + +import numpy as np +from starlette.requests import Request + +from celune import api +from celune.celune import Celune +from celune.pipeline import SpeechStreamQueue + + +class ApiWebUITests(TestCase): + """Tests for the mounted Gradio browser UI helpers.""" + + def setUp(self) -> None: + self.previous_celune = api.bound_celune + self.previous_status_text = api.webui_status_text + self.previous_status_severity = api.webui_status_severity + self.previous_seeded = api.webui_logs_seeded + self.previous_resource_page = api.webui_resource_page + self.previous_last_resource_advance = api.webui_last_resource_advance + self.previous_last_probed_state = api.webui_last_probed_state + self.previous_input_locked = api.webui_input_locked + self.previous_input_placeholder = api.webui_input_placeholder + self.previous_voice_locked = api.webui_voice_locked + self.previous_theme_style = api.webui_theme_style + self.previous_status_source = api.webui_status_source + self.previous_status_updated_at = api.webui_status_updated_at + self.previous_logs = list(api.webui_log_lines) + api.bound_celune = None + api.webui_log_lines.clear() + api.webui_logs_seeded = True + api.webui_resource_page = 0 + api.webui_last_resource_advance = 0.0 + api.webui_last_probed_state = None + api.webui_input_locked = True + api.webui_input_placeholder = "Please wait" + api.webui_voice_locked = True + api.webui_status_source = "probe" + api.webui_status_updated_at = 0.0 + api._set_webui_status("Starting up") + + def tearDown(self) -> None: + api.bound_celune = self.previous_celune + api.webui_status_text = self.previous_status_text + api.webui_status_severity = self.previous_status_severity + api.webui_logs_seeded = self.previous_seeded + api.webui_resource_page = self.previous_resource_page + api.webui_last_resource_advance = self.previous_last_resource_advance + api.webui_last_probed_state = self.previous_last_probed_state + api.webui_input_locked = self.previous_input_locked + api.webui_input_placeholder = self.previous_input_placeholder + api.webui_voice_locked = self.previous_voice_locked + api.webui_theme_style = self.previous_theme_style + api.webui_status_source = self.previous_status_source + api.webui_status_updated_at = self.previous_status_updated_at + api.webui_log_lines.clear() + api.webui_log_lines.extend(self.previous_logs) + + def test_root_redirects_to_browser_ui(self) -> None: + """Verify the fallback root now forwards users to the browser UI.""" + response = api.root() + self.assertEqual(response.status_code, 307) + self.assertEqual(response.headers["location"], "/ui") + + def test_browser_ui_requests_bypass_api_security_detection(self) -> None: + """Verify mounted browser UI paths are recognized by the API middleware.""" + ui_request = Request( + { + "type": "http", + "method": "GET", + "path": "/ui/assets/index.js", + "headers": [], + "query_string": b"", + "client": ("127.0.0.1", 2060), + "scheme": "http", + "server": ("127.0.0.1", 2060), + } + ) + api_request = Request( + { + "type": "http", + "method": "GET", + "path": "/v1/version", + "headers": [], + "query_string": b"", + "client": ("127.0.0.1", 2060), + "scheme": "http", + "server": ("127.0.0.1", 2060), + } + ) + + self.assertEqual(api._is_browser_ui_request(ui_request), True) + self.assertEqual(api._is_browser_ui_request(api_request), False) + + def test_webui_snapshot_uses_bound_celune_state(self) -> None: + """Verify the browser snapshot mirrors current logs, status, and voice state.""" + api.bound_celune = cast( + Celune, + SimpleNamespace( + current_voice="balanced", + voices=("balanced", "calm"), + is_in_tutorial=False, + locked=False, + cur_state="idle", + ), + ) + api.webui_log_lines.append(("Ready to speak.", "info")) + api._set_webui_status("Idle") + + with mock.patch( + "celune.api.ui_resources.resource_pages", + return_value=("VRAM: 10.66/11.94 GB available",), + ): + ( + logs_html, + status_html, + resources_html, + voice_update, + send_update, + input_update, + ) = api._webui_snapshot() + + self.assertIn("Ready to speak.", logs_html) + self.assertIn("style=", logs_html) + self.assertIn("Idle", status_html) + self.assertIn("10.66/11.94", resources_html) + self.assertEqual(voice_update["value"], "Balanced") + self.assertEqual(voice_update["interactive"], True) + self.assertEqual(send_update["interactive"], True) + self.assertEqual(input_update["interactive"], True) + + def test_webui_wrapped_callbacks_mirror_input_and_voice_lock_state(self) -> None: + """Verify callback-driven lock state changes are reflected in the browser UI.""" + celune = cast( + Celune, + SimpleNamespace( + current_voice="balanced", + voices=("balanced", "calm"), + is_in_tutorial=False, + locked=False, + cur_state="idle", + log_callback=lambda msg, severity="info": None, + status_callback=lambda msg, severity="info": None, + voice_changed_callback=lambda name: None, + change_input_state_callback=lambda locked: None, + change_voice_lock_state_callback=lambda locked: None, + ), + ) + api.bind_celune(celune) + + celune.change_input_state_callback(True) + celune.change_voice_lock_state_callback(True) + with mock.patch( + "celune.api.ui_resources.resource_pages", + return_value=("VRAM: 10.66/11.94 GB available",), + ): + _logs, _status, _resources, voice_update, send_update, input_update = ( + api._webui_snapshot() + ) + self.assertEqual(input_update["interactive"], False) + self.assertEqual(input_update["placeholder"], "Please wait") + self.assertEqual(send_update["interactive"], False) + self.assertEqual(voice_update["interactive"], False) + + celune.change_input_state_callback(False) + celune.change_voice_lock_state_callback(False) + with mock.patch( + "celune.api.ui_resources.resource_pages", + return_value=("VRAM: 10.66/11.94 GB available",), + ): + ( + _logs2, + _status2, + _resources2, + voice_update2, + send_update2, + input_update2, + ) = api._webui_snapshot() + self.assertEqual(input_update2["interactive"], True) + self.assertEqual(input_update2["placeholder"], "Enter text to speak here") + self.assertEqual(send_update2["interactive"], True) + self.assertEqual(voice_update2["interactive"], True) + + def test_webui_snapshot_shows_tutorial_placeholder(self) -> None: + """Verify tutorial state uses the tutorial placeholder in the browser UI.""" + api.bound_celune = cast( + Celune, + SimpleNamespace( + current_voice="balanced", + voices=("balanced", "calm"), + is_in_tutorial=True, + locked=True, + cur_state="idle", + ), + ) + api.webui_input_locked = True + api.webui_input_placeholder = "Currently in tutorial mode" + api.webui_voice_locked = True + + with mock.patch( + "celune.api.ui_resources.resource_pages", + return_value=("VRAM: 10.66/11.94 GB available",), + ): + _logs, _status, _resources, voice_update, send_update, input_update = ( + api._webui_snapshot() + ) + self.assertEqual(input_update["interactive"], False) + self.assertEqual(input_update["placeholder"], "Currently in tutorial mode") + self.assertEqual(send_update["interactive"], False) + self.assertEqual(voice_update["interactive"], False) + + def test_seeded_logs_strip_persisted_time_prefix(self) -> None: + """Verify persisted log timestamps do not show up in the browser log view.""" + stripped = api._strip_webui_log_prefix( + "[2026-06-11T14:22:01] [WARNING] Something happened" + ) + self.assertEqual(stripped, "Something happened") + + def test_webui_theme_style_uses_cevoice_theme_metadata(self) -> None: + """Verify browser CSS variables are derived from CEVOICE theme metadata.""" + loader = SimpleNamespace( + bundle=SimpleNamespace( + metadata={ + "theme": { + "background": "#112233", + "accent": "#aabbcc", + "faded_accent": "#556677", + } + } + ) + ) + + with mock.patch("celune.api.default_loader", return_value=loader): + api._configure_webui_theme() + + self.assertIn("--celune-background: #112233;", api.webui_theme_style) + self.assertIn("--celune-sleeping: #556677;", api.webui_theme_style) + self.assertIn("--celune-primary:", api.webui_theme_style) + self.assertIn('rel="icon"', api.WEBUI_HEAD) + + def test_webui_css_keeps_log_panel_flexible(self) -> None: + """Verify the stylesheet keeps the log panel as the growable shell region.""" + self.assertIn("#celune-log-panel", api.WEBUI_CSS) + self.assertIn("flex: 1 1 auto;", api.WEBUI_CSS) + self.assertIn("min-height: 0;", api.WEBUI_CSS) + self.assertIn( + "@media (max-width: 768px), (any-pointer: coarse), (hover: none)", + api.WEBUI_CSS, + ) + self.assertNotIn("margin-top: auto;", api.WEBUI_CSS) + + def test_webui_probe_logs_sleep_transition(self) -> None: + """Verify the browser log mirrors the sleep transition message.""" + api.bound_celune = cast( + Celune, + SimpleNamespace( + current_voice="balanced", + voices=("balanced", "calm"), + is_in_tutorial=False, + locked=False, + cur_state="sleeping", + sleeping=True, + ), + ) + + with mock.patch( + "celune.api.ui_resources.resource_pages", + return_value=("VRAM: 10.66/11.94 GB available",), + ): + logs_html, status_html, _resources, _voice, _send, _input = ( + api._webui_snapshot() + ) + + self.assertIn("currently sleeping. Type anything to wake up.", logs_html) + self.assertIn("Sleeping", status_html) + + def test_webui_probe_does_not_immediately_override_callback_status(self) -> None: + """Verify fast callback statuses remain visible through the next probe.""" + api.bound_celune = cast( + Celune, + SimpleNamespace( + current_voice="balanced", + voices=("balanced", "calm"), + is_in_tutorial=False, + locked=False, + cur_state="speaking", + ), + ) + api.webui_last_probed_state = "idle" + + with ( + mock.patch( + "celune.api.ui_resources.resource_pages", + return_value=("VRAM: 10.66/11.94 GB available",), + ), + mock.patch("celune.api.time.monotonic", side_effect=[10.0, 10.1]), + ): + api._set_webui_status("Normalizing", source="callback") + _logs, status_html, _resources, _voice, _send, _input = ( + api._webui_snapshot() + ) + + self.assertIn("Normalizing", status_html) + + def test_webui_slash_command_uses_main_ui_command_path(self) -> None: + """Verify slash commands are delegated into the main UI command handler.""" + ui = SimpleNamespace() + ui.process_command = mock.Mock() + ui.call_from_thread = mock.Mock(side_effect=lambda fn, *args: fn(*args)) + + with mock.patch("celune.api.CeluneUI._instance", ui): + updates = list(api._webui_speak("/help")) + + ui.process_command.assert_called_once_with("help", []) + self.assertEqual(len(updates), 1) + self.assertEqual(updates[0][0]["value"], "") + self.assertIsNone(updates[0][1]) + + def test_webui_slash_command_warns_without_main_ui(self) -> None: + """Verify slash commands warn instead of speaking when no main UI exists.""" + with mock.patch("celune.api.CeluneUI._instance", None): + updates = list(api._webui_speak("/help")) + + self.assertEqual(len(updates), 1) + self.assertIn("must be running to run commands", updates[0][2]) + + def test_webui_speak_returns_browser_audio_after_generation(self) -> None: + """Verify the browser submit handler returns one browser audio payload.""" + chunks: SpeechStreamQueue = Queue() + chunks.put(np.zeros((2, 8), dtype=np.float32)) + chunks.put(None) + + def say_stream(_content: str, save: bool = True) -> SpeechStreamQueue: + _ = save + return chunks + + api.bound_celune = cast( + Celune, + SimpleNamespace( + say_stream=say_stream, + dev=False, + current_voice="balanced", + voices=("balanced", "calm"), + is_in_tutorial=False, + locked=False, + cur_state="idle", + ), + ) + + with mock.patch( + "celune.api.ui_resources.resource_pages", + return_value=("VRAM: 10.66/11.94 GB available",), + ): + updates = list(api._webui_speak("hello")) + + self.assertGreaterEqual(len(updates), 2) + first_input, first_audio, *_first_rest = updates[0] + second_input, second_audio, *_second_rest = updates[1] + + self.assertEqual(first_input["value"], "") + self.assertIsNone(first_audio) + self.assertEqual(second_input["value"], "") + self.assertIsInstance(second_audio, tuple) + sample_rate, array = cast(tuple[int, np.ndarray], second_audio) + self.assertEqual(sample_rate, 48000) + self.assertEqual(array.shape, (8, 2)) + + def test_webui_speak_wakes_sleeping_celune_before_speaking(self) -> None: + """Verify browser submit wakes Celune first, then continues into speech.""" + calls: list[str] = [] + chunks: SpeechStreamQueue = Queue() + chunks.put(np.zeros((2, 8), dtype=np.float32)) + chunks.put(None) + celune = SimpleNamespace( + dev=False, + current_voice="balanced", + voices=("balanced", "calm"), + is_in_tutorial=False, + locked=False, + cur_state="sleeping", + sleeping=True, + ) + + def wake_from_sleep() -> bool: + calls.append("wake") + celune.sleeping = False + celune.cur_state = "idle" + return True + + def say_stream(content: str, save: bool = True) -> SpeechStreamQueue: + _ = save + calls.append(f"say:{content}") + return chunks + + celune.wake_from_sleep = wake_from_sleep + celune.say_stream = say_stream + api.bound_celune = cast(Celune, celune) + + with mock.patch( + "celune.api.ui_resources.resource_pages", + return_value=("VRAM: 10.66/11.94 GB available",), + ): + updates = list(api._webui_speak("wake me")) + + self.assertGreaterEqual(len(updates), 3) + self.assertEqual(calls, ["wake", "say:wake me"]) + self.assertEqual(updates[0][0]["value"], "wake me") + self.assertIsNone(updates[0][1]) + self.assertEqual(updates[-1][0]["value"], "") + final_audio = updates[-1][1] + self.assertIsInstance(final_audio, tuple) + self.assertEqual(cast(tuple[int, np.ndarray], final_audio)[0], 48000) + + def test_webui_snapshot_probes_runtime_status_and_rotates_resources(self) -> None: + """Verify footer polling refreshes status and rotates the resource page.""" + api.bound_celune = cast( + Celune, + SimpleNamespace( + current_voice="bold", + voices=("bold", "calm"), + is_in_tutorial=False, + locked=False, + cur_state="speaking", + ), + ) + + with ( + mock.patch( + "celune.api.ui_resources.resource_pages", + return_value=("VRAM: first", "Friday, June 11, 2026"), + ), + mock.patch("celune.api.time.monotonic", side_effect=[10.0, 12.2]), + ): + _logs1, status1, resources1, _voice1, _send1, _input1 = ( + api._webui_snapshot() + ) + _logs2, status2, resources2, _voice2, _send2, _input2 = ( + api._webui_snapshot() + ) + + self.assertIn("Speaking", status1) + self.assertIn("Speaking", status2) + self.assertIn("VRAM: first", resources1) + self.assertIn("Friday, June 11, 2026", resources2) diff --git a/tests/test_backends_and_extensions.py b/tests/test_backends_and_extensions.py index 20311e2..6cddd29 100644 --- a/tests/test_backends_and_extensions.py +++ b/tests/test_backends_and_extensions.py @@ -7,23 +7,28 @@ import importlib import threading from pathlib import Path -from collections.abc import Iterator from typing import Optional -from unittest import mock, TestCase from types import SimpleNamespace +from unittest import mock, TestCase +from collections.abc import Iterator import numpy as np import numpy.typing as npt +import torch from celune.utils import discard from celune.backends import resolve_backend from celune.extensions.manager import CeluneExtensionManager from celune.extensions.base import CeluneContext, CeluneExtension -from celune.exceptions import BackendError -from celune.exceptions import ExtensionAlreadyRegisteredError, InvalidExtensionError +from celune.exceptions import ( + BackendError, + ExtensionAlreadyRegisteredError, + InvalidExtensionError, +) from .support import ( FakeBackend, make_voice_loader, + mock_dotstts_backend, mock_mini_backend, mock_qwen3_backend, mock_voxcpm_backend, @@ -33,8 +38,8 @@ class BackendTests(TestCase): """Tests for backend base behavior and backend resolution.""" - def test_base_backend_reports_models_and_progress(self) -> None: - """Verify model metadata and progress helpers on a fake backend. + def test_base_backend_reports_models(self) -> None: + """Verify model metadata helpers on a fake backend. Raises: AssertionError: A backend helper returns an unexpected value. @@ -44,10 +49,6 @@ def test_base_backend_reports_models_and_progress(self) -> None: self.assertEqual(backend.all_model_ids, ["fake/balanced", "fake/bold"]) self.assertEqual(backend.voices, ["balanced", "bold"]) self.assertEqual(backend.model_id_for_voice("bold"), "fake/bold") - self.assertIsNone(backend.generation_progress_total("text")) - self.assertEqual(backend.generation_progress_steps(None), 1) - self.assertEqual(backend.generation_progress_steps({"chunk_steps": 3}), 3) - self.assertEqual(backend.generation_progress_steps({"chunk_steps": 0}), 1) def test_base_backend_materializes_bundle_pt_refs_when_available(self) -> None: """Verify CEVOICE bundles eagerly extract .pt refs alongside .wav files.""" @@ -109,6 +110,16 @@ class StubTTSModel: self.assertIsInstance(backend, mini_cls) self.assertEqual(backend.name, "mini") + def test_resolve_backend_accepts_dotstts_backend_name(self) -> None: + """Verify the dots.tts backend resolves through the backend registry.""" + + with mock_dotstts_backend() as dotstts_cls: + with mock.patch.object(dotstts_cls, "_validate_refs"): + backend = resolve_backend("dotstts") + + self.assertIsInstance(backend, dotstts_cls) + self.assertEqual(backend.name, "dotstts") + def test_voxcpm2_uses_pack_cfg_scale_when_present(self) -> None: """Verify CEVOICE can override VoxCPM2's per-voice CFG scale.""" @@ -238,6 +249,182 @@ def generate_voice_clone_streaming( self.assertEqual(model.ref_text, "Pack reference.") + def test_dotstts_uses_pack_reference_text_when_present(self) -> None: + """Verify CEVOICE can override dots.tts reference text.""" + + with mock_dotstts_backend() as dotstts_cls: + + class FakeModel: + """Fake model class for use in this test suite.""" + + sample_rate = 48000 + + def __init__(self) -> None: + self.prompt_text = None + + def generate_stream(self, *args, **kwargs) -> Iterator[torch.Tensor]: + """Generate fake dots.tts chunks. + + Args: + args: Arguments used for generation. + kwargs: Keyword arguments used for generation. + """ + discard(args) + self.prompt_text = kwargs["prompt_text"] + yield torch.zeros((1, 4), dtype=torch.float32) + + loader = make_voice_loader("calm", {"reference_text": "Pack reference."}) + with ( + mock.patch.object(dotstts_cls, "_validate_refs"), + mock.patch( + "celune.backends.dotstts.default_loader", return_value=loader + ), + ): + backend = dotstts_cls(log=lambda _msg, _severity="info": None) + model = FakeModel() + list(backend.generate_stream(model, text="hello", voice="calm")) + + self.assertEqual(model.prompt_text, "Pack reference.") + + def test_dotstts_falls_back_to_the_active_pack_voice_ids(self) -> None: + """Verify dots.tts uses the pack voice when the backend default is absent.""" + + with mock_dotstts_backend() as dotstts_cls: + + class FakeModel: + """Fake model class for use in this test suite.""" + + sample_rate = 48000 + + def __init__(self) -> None: + self.prompt_audio_path = None + self.prompt_text = None + + def generate_stream(self, *args, **kwargs) -> Iterator[torch.Tensor]: + """Generate fake dots.tts chunks.""" + discard(args) + self.prompt_audio_path = kwargs["prompt_audio_path"] + self.prompt_text = kwargs["prompt_text"] + yield torch.zeros((1, 4), dtype=torch.float32) + + loader = make_voice_loader("calm", {"reference_text": "Pack reference."}) + with ( + mock.patch.object(dotstts_cls, "_validate_refs"), + mock.patch( + "celune.backends.dotstts.default_loader", return_value=loader + ), + ): + backend = dotstts_cls(log=lambda _msg, _severity="info": None) + model = FakeModel() + list(backend.generate_stream(model, text="hello")) + + self.assertEqual(model.prompt_audio_path, str(Path("calm.wav"))) + self.assertEqual(model.prompt_text, "Pack reference.") + + def test_dotstts_requires_reference_text_for_valid_voice_identifiers(self) -> None: + """Verify dots.tts rejects packs whose voices omit the required reference text.""" + + with mock_dotstts_backend() as dotstts_cls: + loader = make_voice_loader("calm", {}) + with mock.patch( + "celune.backends.dotstts.default_loader", return_value=loader + ): + with self.assertRaisesRegex( + BackendError, "requires a compatible CEVOICE/CECHAR package" + ): + dotstts_cls(log=lambda _msg, _severity="info": None) + + def test_dotstts_requires_a_compatible_voice_pack(self) -> None: + """Verify dots.tts refuses to initialize without a usable CEVOICE/CECHAR pack.""" + + with ( + mock_dotstts_backend() as dotstts_cls, + mock.patch("celune.backends.dotstts.default_loader", return_value=None), + ): + with self.assertRaisesRegex( + BackendError, "requires a compatible CEVOICE/CECHAR package" + ): + dotstts_cls(log=lambda _msg, _severity="info": None) + + def test_dotstts_manually_pumps_and_closes_backend_stream(self) -> None: + """Verify dots.tts iterates and closes its backend stream explicitly.""" + + with mock_dotstts_backend() as dotstts_cls: + + class FakeStream: + """Minimal iterator exposing a close hook for one backend test.""" + + def __init__(self) -> None: + self._chunks = [ + torch.zeros((1, 1), dtype=torch.float32), + torch.ones((1, 1), dtype=torch.float32), + ] + self.closed = False + + def __iter__(self) -> "FakeStream": + return self + + def __next__(self) -> torch.Tensor: + if not self._chunks: + raise StopIteration + return self._chunks.pop(0) + + def close(self) -> None: + """Close the stream.""" + self.closed = True + + class FakeModel: + """Fake model class for use in this test suite.""" + + sample_rate = 48000 + + def __init__(self) -> None: + self.stream = FakeStream() + + def generate_stream(self, *args, **kwargs) -> FakeStream: + """Generate fake dots.tts chunks. + + Args: + args: Arguments used for generation. + kwargs: Keyword arguments used for generation. + + Returns: + FakeStream: A fake stream of dots.tts chunks. + """ + discard(args) + discard(kwargs) + return self.stream + + loader = make_voice_loader("calm", {"reference_text": "Pack reference."}) + with ( + mock.patch.object(dotstts_cls, "_validate_refs"), + mock.patch( + "celune.backends.dotstts.default_loader", return_value=loader + ), + ): + backend = dotstts_cls(log=lambda _msg, _severity="info": None) + model = FakeModel() + chunks = list( + backend.generate_stream(model, text="hello", voice="calm") + ) + + self.assertEqual(len(chunks), 2) + self.assertEqual(chunks[0][1], 48000) + self.assertEqual(chunks[1][0].tolist(), [1.0]) + self.assertEqual(model.stream.closed, True) + + def test_dotstts_suppresses_loguru_runtime_noise(self) -> None: + """Verify dots.tts suppression also disables its Loguru logger namespace.""" + + with mock_dotstts_backend() as dotstts_cls: + fake_loguru = mock.Mock() + with mock.patch("celune.backends.dotstts.loguru_logger", fake_loguru): + with dotstts_cls._suppress_backend_output(): + pass + + fake_loguru.disable.assert_called_once_with("dots_tts") + fake_loguru.enable.assert_called_once_with("dots_tts") + def test_qwen3_requires_reference_text_for_valid_voice_identifiers(self) -> None: """Verify Qwen3 rejects packs whose voices omit the required reference text.""" @@ -442,12 +629,15 @@ def setUp(self) -> None: self.logs: list[tuple[str, str]] = [] self.dev_logs: list[tuple[str, str]] = [] self.invocations: list[tuple[str, tuple[str, ...]]] = [] + self.play_calls: list[tuple[str, bool, float]] = [] self.context = CeluneContext( log=lambda msg, severity="info": self.logs.append((msg, severity)), log_dev=lambda msg, severity="info": self.dev_logs.append((msg, severity)), say=lambda text, save=True, display_text=None: True, think=lambda text: True, - play=lambda sound_path, keep=False: True, + play=lambda sound_path, keep=False, volume=1.0: ( + self.play_calls.append((sound_path, keep, volume)) or True + ), status=lambda msg, severity="info": None, set_voice=lambda name: True, get_state=lambda: "idle", @@ -469,6 +659,9 @@ def test_context_and_extension_helpers_delegate_calls(self) -> None: self.assertEqual(extension.say("hello"), True) self.assertEqual(extension.think("hello"), True) self.assertEqual(extension.play("tone.wav"), True) + self.assertEqual(self.play_calls[-1], ("tone.wav", False, 1.0)) + self.assertEqual(extension.play("quiet.wav", keep=True, volume=0.25), True) + self.assertEqual(self.play_calls[-1], ("quiet.wav", True, 0.25)) self.assertEqual(extension.set_voice("bold"), True) def test_manager_registers_invokes_and_autoloads_extensions(self) -> None: diff --git a/tests/test_backends_mini.py b/tests/test_backends_mini.py index 722a8c8..e537da9 100644 --- a/tests/test_backends_mini.py +++ b/tests/test_backends_mini.py @@ -2,10 +2,10 @@ """Tests for the Pocket TTS backend cleanup behavior.""" import tempfile -from types import SimpleNamespace +from typing import cast from pathlib import Path +from types import SimpleNamespace from unittest import mock, TestCase -from typing import cast from pocket_tts import TTSModel diff --git a/tests/test_celune_core.py b/tests/test_celune_core.py index 17cf076..998f06c 100644 --- a/tests/test_celune_core.py +++ b/tests/test_celune_core.py @@ -4,19 +4,19 @@ import threading from typing import cast from pathlib import Path -from unittest import mock, TestCase from types import SimpleNamespace +from unittest import mock, TestCase from transformers.modeling_utils import PreTrainedModel from transformers.tokenization_utils_base import PreTrainedTokenizerBase -from celune.backends.qwen3 import Qwen3 from celune.celune import Celune from celune.config import Config +from celune.backends.qwen3 import Qwen3 from celune.vram import QWEN3_0_6B_MODEL from celune.persona.impl import persona_quantization from celune.exceptions import BackendError, WarmupError -from tests.support import FakeBackend, FakeGlow +from .support import FakeBackend, FakeGlow class CeluneCoreTests(TestCase): @@ -188,7 +188,7 @@ def test_load_preloads_persona_runtime_when_available(self) -> None: with ( mock.patch("celune.celune.threading.Thread") as thread_cls, mock.patch("celune.celune.validate_runtime", return_value=True), - mock.patch("celune.celune.play_readiness_signal", return_value=False), + mock.patch("celune.celune.play_signal", return_value=False), ): thread_cls.return_value.start = mock.Mock() self.assertEqual(celune.load(), True) @@ -213,7 +213,7 @@ def test_load_disables_persona_when_preload_fails(self) -> None: with ( mock.patch("celune.celune.threading.Thread") as thread_cls, mock.patch("celune.celune.validate_runtime", return_value=True), - mock.patch("celune.celune.play_readiness_signal", return_value=False), + mock.patch("celune.celune.play_signal", return_value=False), ): thread_cls.return_value.start = mock.Mock() self.assertEqual(celune.load(), True) @@ -221,6 +221,30 @@ def test_load_disables_persona_when_preload_fails(self) -> None: persona_client.close.assert_called_once_with() self.assertIsNone(celune.vision) + def test_change_voice_returns_runtime_state_to_idle(self) -> None: + """Verify successful voice reload leaves Celune in the idle state.""" + celune = self._make_celune({}) + celune.current_voice = "balanced" + celune.voices = ("balanced", "bold") + celune.model_name = "shared-model" + celune.loaded = True + celune.cur_state = "idle" + celune.backend.model_id_for_voice = mock.Mock(return_value="shared-model") + statuses: list[tuple[str, str]] = [] + celune.status_callback = lambda msg, severity="info": statuses.append( + (msg, severity) + ) + celune.voice_changed_callback = mock.Mock() + + with mock.patch("celune.celune.play_signal", return_value=False): + celune.change_voice("bold") + + self.assertEqual(celune.current_voice, "bold") + self.assertEqual(celune.loaded, True) + self.assertEqual(celune.cur_state, "idle") + self.assertEqual(statuses[-1], ("Idle", "info")) + celune.voice_changed_callback.assert_called_once_with("bold") + def test_persona_talkback_config_can_disable_persona_input_mode(self) -> None: """Verify persona talkback can be disabled without disabling Persona.""" from celune.persona.impl import persona_enabled, persona_talkback_enabled @@ -265,6 +289,25 @@ def test_low_vram_restricts_heavy_backends_to_mini(self) -> None: self.assertEqual(resolve.call_args.args[0], "mini") + def test_low_vram_restricts_dotstts_to_mini(self) -> None: + """Verify low VRAM falls back to mini when dots.tts is requested.""" + with ( + mock.patch("celune.celune.AudioRGBGlow", FakeGlow), + mock.patch("celune.celune.default_loader", return_value=None), + mock.patch("celune.celune.persona_is_available", return_value=False), + mock.patch( + "celune.celune.resolve_backend", + return_value=FakeBackend(log=lambda _msg, _severity="info": None), + ) as resolve, + ): + celune = Celune( + config={"vram": "low"}, + tts_backend="dotstts", + ) + self.addCleanup(self._close_celune, celune) + + self.assertEqual(resolve.call_args.args[0], "mini") + def test_voice_prompt_support_tracks_qwen3_0_6b_capability(self) -> None: """Verify voice prompts are disabled for the low-tier Qwen3 clone model.""" celune = self._make_celune({}) @@ -404,7 +447,7 @@ def test_load_success_and_model_failure_paths_are_stubbed(self) -> None: with ( mock.patch("celune.celune.threading.Thread") as thread_cls, mock.patch("celune.celune.validate_runtime", return_value=True), - mock.patch("celune.celune.play_readiness_signal", return_value=False), + mock.patch("celune.celune.play_signal", return_value=False), ): thread_cls.return_value.start = mock.Mock() self.assertEqual(celune.load(), True) @@ -418,7 +461,8 @@ def test_load_success_and_model_failure_paths_are_stubbed(self) -> None: failing.backend.load_default_model = mock.Mock(side_effect=RuntimeError("boom")) errors: list[str] = [] failing.error_callback = errors.append - self.assertEqual(failing.load(), False) + with mock.patch("celune.celune.play_signal", return_value=False): + self.assertEqual(failing.load(), False) self.assertEqual(errors, ["Default model failed to load"]) self.assertEqual(getattr(failing.glow, "fatal_called"), True) @@ -532,7 +576,8 @@ def test_sleep_mode_unloads_configured_models_and_wakes(self) -> None: celune._persona_conn = mock.Mock(return_value=persona_client) old_backend = celune.backend - self.assertEqual(celune.enter_sleep_mode(), True) + with mock.patch("celune.celune.play_signal", return_value=False): + self.assertEqual(celune.enter_sleep_mode(), True) self.assertEqual(celune.sleeping, True) self.assertEqual(celune.loaded, False) @@ -545,7 +590,8 @@ def test_sleep_mode_unloads_configured_models_and_wakes(self) -> None: self.assertIsNone(celune.vision) persona_client.close.assert_called_once_with() - self.assertEqual(celune.wake_from_sleep(), True) + with mock.patch("celune.celune.play_signal", return_value=False): + self.assertEqual(celune.wake_from_sleep(), True) self.assertIsNot(celune.backend, old_backend) self.assertEqual(celune.sleeping, False) @@ -571,9 +617,28 @@ def test_sleep_mode_closes_persona_even_if_close_raises(self) -> None: celune.cur_state = "idle" celune.vision = mock.Mock(close=mock.Mock(side_effect=RuntimeError("boom"))) - self.assertEqual(celune.enter_sleep_mode(), True) + with mock.patch("celune.celune.play_signal", return_value=False): + self.assertEqual(celune.enter_sleep_mode(), True) self.assertIsNone(celune.vision) + def test_sleep_mode_plays_signal_after_releasing_pipeline_lock(self) -> None: + """Verify the sleeping signal is not invoked while ``say_lock`` is still held.""" + celune = self._make_celune( + {"sleep": {"enabled": True, "unload": {"persona": False, "tts": False}}} + ) + celune.locked = False + celune.loaded = True + celune.cur_state = "idle" + + def play_sleep_signal(engine: Celune, signal_type: str) -> bool: + self.assertEqual(signal_type, "sleeping") + self.assertEqual(engine.say_lock.acquire(blocking=False), True) + engine.say_lock.release() + return False + + with mock.patch("celune.celune.play_signal", side_effect=play_sleep_signal): + self.assertEqual(celune.enter_sleep_mode(), True) + def test_wake_failure_switches_glow_to_fatal_color(self) -> None: """Verify wake failures trigger the fixed fatal OpenRGB glow state.""" celune = self._make_celune( @@ -594,7 +659,10 @@ def test_wake_failure_switches_glow_to_fatal_color(self) -> None: failing_backend = FakeBackend(log=lambda _msg, _severity="info": None) failing_backend.load_model = mock.Mock(side_effect=RuntimeError("boom")) - with mock.patch("celune.celune.resolve_backend", return_value=failing_backend): + with ( + mock.patch("celune.celune.resolve_backend", return_value=failing_backend), + mock.patch("celune.celune.play_signal", return_value=False), + ): self.assertEqual(celune.wake_from_sleep(), False) self.assertEqual(getattr(celune.glow, "fatal_called"), True) @@ -629,9 +697,12 @@ def blocking_load_model(model_id: str) -> dict[str, object]: recreated_backend.load_model = mock.Mock(side_effect=blocking_load_model) - with mock.patch( - "celune.celune.resolve_backend", return_value=recreated_backend - ) as resolve_backend: + with ( + mock.patch( + "celune.celune.resolve_backend", return_value=recreated_backend + ) as resolve_backend, + mock.patch("celune.celune.play_signal", return_value=False), + ): results: list[bool] = [] def wake() -> None: diff --git a/tests/test_colors_and_dsp.py b/tests/test_colors_and_dsp.py index e8d90bc..7e45941 100644 --- a/tests/test_colors_and_dsp.py +++ b/tests/test_colors_and_dsp.py @@ -2,7 +2,7 @@ """Tests for color and DSP helpers.""" from typing import cast -from unittest import TestCase +from unittest import TestCase, mock import numpy as np @@ -51,6 +51,9 @@ def test_default_and_custom_theme_palettes_are_configured(self) -> None: class DspTests(TestCase): """Tests for lightweight DSP helpers.""" + def tearDown(self) -> None: + dsp._SIGNAL_CACHE.clear() + def test_make_stereo_and_resampling_validate_audio(self) -> None: """Verify stereo conversion and sample-rate validation paths. @@ -95,3 +98,33 @@ def test_soften_split_and_silence_detection(self) -> None: dsp.is_silent_utterance(normal), (False, UtteranceLoudnessTier.NORMAL), ) + + def test_ui_signal_helpers_reuse_cached_audio(self) -> None: + """Verify UI signal helpers reuse immutable cached buffers.""" + base = np.ones((4, 2), dtype=np.float32) + + with ( + mock.patch("celune.dsp._load_readiness_signal", return_value=base) as load, + mock.patch( + "celune.dsp._pitch_shift_ui_signal", + side_effect=lambda audio, n_steps: audio + np.float32(n_steps), + ) as shift, + ): + readiness_first = dsp.readiness_signal() + readiness_second = dsp.readiness_signal() + sleeping_first = dsp.sleeping_signal() + sleeping_second = dsp.sleeping_signal() + working_first = dsp.working_signal() + working_second = dsp.working_signal() + error_first = dsp.error_signal() + error_second = dsp.error_signal() + + self.assertIs(readiness_first, readiness_second) + self.assertIs(sleeping_first, sleeping_second) + self.assertIs(working_first, working_second) + self.assertIs(error_first, error_second) + self.assertFalse(readiness_first.flags.writeable) + self.assertEqual(error_first.shape, (4, 2)) + self.assertAlmostEqual(float(np.max(np.abs(error_first))), 1.0) + self.assertEqual(load.call_count, 1) + self.assertEqual(shift.call_count, 3) diff --git a/tests/test_config_and_utils.py b/tests/test_config_and_utils.py index 01be979..9144b29 100644 --- a/tests/test_config_and_utils.py +++ b/tests/test_config_and_utils.py @@ -3,8 +3,8 @@ import math import datetime -from typing import Literal, Optional, Mapping, cast from unittest import mock, TestCase +from typing import Literal, Optional, Mapping, cast from celune import config, utils from celune.constants import JSONSerializable diff --git a/tests/test_main_doctor.py b/tests/test_main_doctor.py index 05246f5..ca18c1a 100644 --- a/tests/test_main_doctor.py +++ b/tests/test_main_doctor.py @@ -3,8 +3,8 @@ import io import contextlib -from pathlib import Path from unittest import TestCase, mock +from pathlib import Path, PureWindowsPath import main @@ -14,6 +14,23 @@ class DoctorCommandTests(TestCase): """Verify `celune doctor` works without booting the full app.""" + def test_main_reports_unsupported_python_before_loading_entrypoint(self) -> None: + """Verify doctor on Python 3.11 exits cleanly before importing 3.12-only modules.""" + with ( + mock.patch.object(main.sys, "version_info", (3, 11, 9)), + mock.patch.object(main, "_load_entrypoint_module") as load_entrypoint, + contextlib.redirect_stdout(io.StringIO()) as stdout, + self.assertRaises(SystemExit) as exit_info, + ): + main.main(["celune", "doctor"]) + + self.assertEqual(exit_info.exception.code, 6) + load_entrypoint.assert_not_called() + output = stdout.getvalue() + self.assertIn("will not run on Python 3.11.9", output) + self.assertIn("use at least Python 3.12", output) + self.assertIn("doctor", output) + def test_main_routes_doctor_without_starting_app(self) -> None: """Verify the doctor branch exits through `run_doctor` instead of `start()`.""" with ( @@ -41,11 +58,43 @@ def test_run_doctor_fix_invokes_repo_setup(self) -> None: self.assertEqual(exit_code, 0) run.assert_called_once_with( - [entrypoint.sys.executable, str(entrypoint.SETUP_PATH)], + [str(entrypoint._doctor_running_python()), str(entrypoint.SETUP_PATH)], cwd=entrypoint.PROJECT_ROOT, check=False, ) + def test_run_doctor_fix_uses_repo_venv_python_when_compiled(self) -> None: + """Verify compiled doctor fixups use the repo virtualenv Python.""" + checks = [entrypoint.DoctorCheck("Python", True, "3.12.0")] + + with ( + mock.patch.object(entrypoint, "_doctor_checks", return_value=checks), + mock.patch.object(entrypoint, "running_compiled", return_value=True), + mock.patch.object( + entrypoint, + "_doctor_venv_python", + return_value=Path("C:/repo/.venv/Scripts/python.exe"), + ), + mock.patch.object(entrypoint.subprocess, "run") as run, + contextlib.redirect_stdout(io.StringIO()), + ): + run.return_value.returncode = 0 + exit_code = entrypoint.run_doctor(["celune", "doctor", "--fix"]) + + self.assertEqual(exit_code, 0) + + run.assert_called_once() + args, kwargs = run.call_args + command = args[0] + + self.assertEqual( + PureWindowsPath(command[0]), + PureWindowsPath(r"C:\repo\.venv\Scripts\python.exe"), + ) + self.assertEqual(command[1], str(entrypoint.SETUP_PATH)) + self.assertEqual(kwargs["cwd"], entrypoint.PROJECT_ROOT) + self.assertFalse(kwargs["check"]) + def test_run_doctor_rejects_unknown_args(self) -> None: """Verify unsupported doctor flags produce usage output and a CLI error code.""" with contextlib.redirect_stdout(io.StringIO()) as stdout: diff --git a/tests/test_modeling.py b/tests/test_modeling.py index 7f29c6d..71d4be0 100644 --- a/tests/test_modeling.py +++ b/tests/test_modeling.py @@ -2,8 +2,8 @@ """Tests for model loading helpers.""" from typing import cast -from unittest import TestCase, mock from types import SimpleNamespace +from unittest import TestCase, mock import torch diff --git a/tests/test_namedays_i18n_updater.py b/tests/test_namedays_i18n_updater.py index c4ef86f..c93c1b3 100644 --- a/tests/test_namedays_i18n_updater.py +++ b/tests/test_namedays_i18n_updater.py @@ -2,8 +2,11 @@ """Tests for lightweight data, localization, and update helpers.""" import datetime +import json import subprocess +import tempfile from unittest import mock, TestCase +from pathlib import Path from celune import i18n, namedays, updater @@ -169,6 +172,92 @@ def test_has_new_remote_revision_only_for_fast_forward_updates(self) -> None: ): self.assertFalse(updater._has_new_remote_revision("a" * 40, "b" * 40)) + def test_check_for_update_compiled_uses_bundle_checksums(self) -> None: + """Verify compiled update detection compares bundle checksums against artifact metadata.""" + with tempfile.TemporaryDirectory() as temp_dir: + bundle_dir = Path(temp_dir) + (bundle_dir / "celune.exe").write_bytes(b"launcher-old") + (bundle_dir / "celune-bin.exe").write_bytes(b"runtime-old") + manifest = { + "version": "4.1.0", + "revision": "a" * 40, + "artifact": "Celune-win-x64", + "files": { + "celune.exe": updater._sha256_file(bundle_dir / "celune.exe"), + "celune-bin.exe": updater._sha256_file( + bundle_dir / "celune-bin.exe" + ), + }, + } + (bundle_dir / updater.UPDATE_MANIFEST_NAME).write_text( + json.dumps(manifest), + encoding="utf-8", + ) + remote = updater.BundleManifest( + version="4.2.0", + revision="b" * 40, + artifact="Celune-win-x64", + files={ + "celune.exe": "1" * 64, + "celune-bin.exe": "2" * 64, + }, + ) + + with ( + mock.patch("celune.updater.running_compiled", return_value=True), + mock.patch("celune.updater._bundle_dir", return_value=bundle_dir), + mock.patch( + "celune.updater._read_remote_bundle_manifest", return_value=remote + ), + mock.patch("celune.updater._is_git_checkout", return_value=False), + ): + update = updater.check_for_update() + + self.assertIsNotNone(update) + if update is not None: + self.assertEqual(update.local_revision, "aaaaaaa") + self.assertEqual(update.latest_revision, "bbbbbbb") + self.assertEqual(update.latest_version, "4.2.0") + + def test_check_for_update_compiled_returns_none_when_bundle_matches_remote( + self, + ) -> None: + """Verify compiled update checks stay quiet when the local bundle already matches.""" + with tempfile.TemporaryDirectory() as temp_dir: + bundle_dir = Path(temp_dir) + (bundle_dir / "celune.exe").write_bytes(b"launcher") + (bundle_dir / "celune-bin.exe").write_bytes(b"runtime") + local_files = { + "celune.exe": updater._sha256_file(bundle_dir / "celune.exe"), + "celune-bin.exe": updater._sha256_file(bundle_dir / "celune-bin.exe"), + } + manifest = { + "version": "4.1.0", + "revision": "a" * 40, + "artifact": "Celune-win-x64", + "files": local_files, + } + (bundle_dir / updater.UPDATE_MANIFEST_NAME).write_text( + json.dumps(manifest), + encoding="utf-8", + ) + remote = updater.BundleManifest( + version="4.1.0", + revision="a" * 40, + artifact="Celune-win-x64", + files=local_files, + ) + + with ( + mock.patch("celune.updater.running_compiled", return_value=True), + mock.patch("celune.updater._bundle_dir", return_value=bundle_dir), + mock.patch( + "celune.updater._read_remote_bundle_manifest", return_value=remote + ), + mock.patch("celune.updater._is_git_checkout", return_value=False), + ): + self.assertIsNone(updater.check_for_update()) + def test_update_to_latest_rejects_unsafe_states(self) -> None: """Verify unsafe repository states reject automatic updates. diff --git a/tests/test_persona_api.py b/tests/test_persona_api.py index 8f6728b..6090e5d 100644 --- a/tests/test_persona_api.py +++ b/tests/test_persona_api.py @@ -2,8 +2,8 @@ """Tests for the shared Persona runtime helpers.""" import contextlib -from unittest import TestCase, mock from types import SimpleNamespace +from unittest import TestCase, mock from typing import Optional, Union, cast from celune.persona import impl @@ -157,7 +157,7 @@ def __call__(self, **kwargs) -> _FakeEncoded: class _FakeQwenVlConfig: """Minimal config fake exposing the expected Qwen VL model type.""" - model_type = "qwen2_5_vl" + model_type = "qwen3_vl" class PersonaApiTests(TestCase): @@ -202,7 +202,7 @@ def _mock_qwen_vl_load( ) model_loader = stack.enter_context( mock.patch( - "celune.persona.runtime.Qwen2_5_VLForConditionalGeneration.from_pretrained", + "celune.persona.runtime.Qwen3VLForConditionalGeneration.from_pretrained", return_value=model, ) ) diff --git a/tests/test_persona_memory.py b/tests/test_persona_memory.py index a3c0bf3..72cd186 100644 --- a/tests/test_persona_memory.py +++ b/tests/test_persona_memory.py @@ -3,9 +3,9 @@ import tempfile from pathlib import Path -from collections.abc import Sequence from unittest import TestCase from typing import Union, Optional +from collections.abc import Sequence import numpy as np diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 7e1be59..cfd6288 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -2,28 +2,29 @@ """Tests for pipeline helpers that do not perform real synthesis.""" import os +import sys import queue import tempfile import threading import json as _json from pathlib import Path -from collections.abc import Iterator from typing import cast, Optional -from unittest import mock, TestCase from types import SimpleNamespace +from unittest import mock, TestCase +from collections.abc import Iterator import numpy as np import numpy.typing as npt import soundfile as sf +from celune import pipeline from celune.celune import Celune from celune.utils import discard -from celune import pipeline from celune.persona.prompts import PersonaPromptBuilder from celune.constants import JSON, JSONSerializable, PipelineStates from celune.cevoice import CEVoicePersona, PersonaIdentity, PersonaStyleValues -from tests.support import FakeStream, make_pipeline_engine -from tests.test_persona_memory import StubEmbeddingMemoryStore +from .support import FakeStream, make_pipeline_engine +from .test_persona_memory import StubEmbeddingMemoryStore class PipelineTests(TestCase): @@ -72,27 +73,6 @@ def should_reload_for_language(self, lang: Optional[str]) -> bool: """ return self.resolve_generation_language(lang) != self.current_language - @staticmethod - def generation_progress_total(_text: str) -> None: - """Return no fixed generation budget for the fake backend. - - Args: - _text: Unused text value. - """ - return None - - @staticmethod - def generation_progress_steps(_timing: Optional[dict]) -> int: - """Return one fake generation step per chunk. - - Args: - _timing: Unused timing value. - - Returns: - int: The amount of steps that would be processed so far. - """ - return 1 - @staticmethod def model_id_for_voice(_voice: str) -> str: """Resolve the fake voice to one model identifier. @@ -209,6 +189,21 @@ def test_queue_speech_handles_success_and_failure_paths(self) -> None: request = engine.text_queue.get_nowait() self.assertEqual(request.language, "fr") + engine = make_pipeline_engine() + engine.backend = SimpleNamespace(name="qwen3", supported_languages=("en",)) + with mock.patch( + "celune.pipeline.detect_language", + return_value={ + "language": "en", + "languages": ["en"], + "supported": True, + "probabilities": {"en": 1.0}, + }, + ): + self.assertEqual(pipeline.queue_speech(cast(Celune, engine), "hello"), True) + request = engine.text_queue.get_nowait() + self.assertEqual(request.language, "Auto") + engine = make_pipeline_engine() engine.is_in_tutorial = True self.assertEqual(pipeline.queue_speech(cast(Celune, engine), "hello"), False) @@ -219,6 +214,592 @@ def test_queue_speech_handles_success_and_failure_paths(self) -> None: self.assertEqual(pipeline.queue_speech(cast(Celune, engine), "hello"), False) self.assertEqual(engine.errors, ["Celune is not currently ready"]) + def test_download_youtube_sfx_writes_expected_temp_wav(self) -> None: + """Verify yt-dlp downloads to Celune's fixed temporary WAV path.""" + engine = make_pipeline_engine() + with tempfile.TemporaryDirectory() as temp_dir: + temp_root = Path(temp_dir) + expected = temp_root / "temp" / "temporary_audio.wav" + + def fake_run(*args, **kwargs): + discard(args) + discard(kwargs) + expected.write_bytes(b"RIFFdemoWAVE") + return SimpleNamespace( + returncode=0, + stdout="Fixture Video Title\n", + stderr="", + ) + + with ( + mock.patch("celune.pipeline.app_data_dir", return_value=temp_root), + mock.patch( + "celune.pipeline.importlib_util.find_spec", return_value=object() + ), + mock.patch( + "celune.pipeline._youtube_sfx_title", + return_value="Fixture Video Title", + ), + mock.patch( + "celune.pipeline.subprocess.run", side_effect=fake_run + ) as run, + ): + resolved = pipeline._download_youtube_sfx( + cast(Celune, engine), + "https://youtu.be/demo", + ) + + self.assertEqual(resolved, (expected, "Fixture Video Title")) + command = run.call_args.args[0] + self.assertEqual(command[0], sys.executable) + self.assertEqual(command[1:3], ["-m", "yt_dlp"]) + self.assertNotIn("--print", command) + self.assertIn(str(temp_root / "temp" / "temporary_audio.%(ext)s"), command) + + def test_download_youtube_sfx_uses_repo_venv_python_when_compiled(self) -> None: + """Verify compiled launches call yt-dlp through the repo venv Python.""" + engine = make_pipeline_engine() + with tempfile.TemporaryDirectory() as temp_dir: + temp_root = Path(temp_dir) + expected = temp_root / "temp" / "temporary_audio.wav" + + def fake_run(*args, **kwargs): + discard(args) + discard(kwargs) + expected.write_bytes(b"RIFFdemoWAVE") + return SimpleNamespace(returncode=0, stdout="", stderr="") + + with ( + mock.patch("celune.pipeline.app_data_dir", return_value=temp_root), + mock.patch( + "celune.pipeline.importlib_util.find_spec", return_value=object() + ), + mock.patch( + "celune.pipeline._youtube_sfx_title", + return_value="Fixture Video Title", + ), + mock.patch("celune.pipeline.running_compiled", return_value=True), + mock.patch("celune.pipeline.project_root", return_value=Path("/repo")), + mock.patch( + "celune.pipeline.subprocess.run", side_effect=fake_run + ) as run, + ): + resolved = pipeline._download_youtube_sfx( + cast(Celune, engine), + "https://youtu.be/demo", + ) + + self.assertEqual(resolved, (expected, "Fixture Video Title")) + command = run.call_args.args[0] + expected_python = ( + r"/repo/.venv/bin/python" + if os.name != "nt" + else r"\repo\.venv\Scripts\python.exe" + ) + self.assertEqual(command[0], expected_python) + self.assertEqual(command[1:3], ["-m", "yt_dlp"]) + + def test_download_youtube_sfx_logs_missing_file_state(self) -> None: + """Verify missing yt-dlp output uses the current no-file warning messages.""" + engine = make_pipeline_engine() + with tempfile.TemporaryDirectory() as temp_dir: + temp_root = Path(temp_dir) + + with ( + mock.patch("celune.pipeline.app_data_dir", return_value=temp_root), + mock.patch( + "celune.pipeline.importlib_util.find_spec", return_value=object() + ), + mock.patch( + "celune.pipeline._youtube_sfx_title", + return_value="Fixture Video Title", + ), + mock.patch( + "celune.pipeline.subprocess.run", + return_value=SimpleNamespace( + returncode=0, + stdout="postprocessor said something", + stderr="", + ), + ), + ): + resolved = pipeline._download_youtube_sfx( + cast(Celune, engine), + "https://youtu.be/demo", + ) + + self.assertIsNone(resolved) + warnings = [msg for msg, severity in engine.messages if severity == "warning"] + self.assertIn("Downloader returned no file.", warnings) + self.assertIn("postprocessor said something", warnings) + self.assertNotIn("Could not download audio.", warnings) + self.assertNotIn("Audio downloading failed:", warnings) + self.assertTrue(any("postprocessor said something" in msg for msg in warnings)) + self.assertEqual(engine.errors[-1], "Could not download YouTube audio") + + def test_download_youtube_sfx_logs_download_failure_state(self) -> None: + """Verify yt-dlp failures use the current download-failed warning messages.""" + engine = make_pipeline_engine() + with tempfile.TemporaryDirectory() as temp_dir: + temp_root = Path(temp_dir) + + with ( + mock.patch("celune.pipeline.app_data_dir", return_value=temp_root), + mock.patch( + "celune.pipeline.importlib_util.find_spec", return_value=object() + ), + mock.patch( + "celune.pipeline._youtube_sfx_title", + return_value="Fixture Video Title", + ), + mock.patch( + "celune.pipeline.subprocess.run", + return_value=SimpleNamespace( + returncode=1, + stdout="", + stderr="yt-dlp exploded", + ), + ), + ): + resolved = pipeline._download_youtube_sfx( + cast(Celune, engine), + "https://youtu.be/demo", + ) + + self.assertIsNone(resolved) + warnings = [msg for msg, severity in engine.messages if severity == "warning"] + self.assertIn("Could not download audio.", warnings) + self.assertIn("yt-dlp exploded", warnings) + self.assertNotIn("Downloader returned no file.", warnings) + self.assertEqual(engine.errors[-1], "Could not download YouTube audio") + + def test_youtube_sfx_title_reads_oembed_title(self) -> None: + """Verify YouTube titles can be resolved without yt-dlp title output.""" + + class FakeResponse: + """Minimal urlopen response stub.""" + + def __enter__(self) -> "FakeResponse": + return self + + def __exit__(self, *_args: object) -> None: + return None + + @staticmethod + def read() -> bytes: + """Read a mock video title. + + Returns: + bytes: Mock JSON payload returned by the fake HTTP response. + """ + return b'{"title":"Fixture Video Title"}' + + with mock.patch("celune.pipeline.urlopen", return_value=FakeResponse()): + title = pipeline._youtube_sfx_title("https://youtu.be/demo") + + self.assertEqual(title, "Fixture Video Title") + + def test_play_accepts_youtube_url_via_downloaded_wav(self) -> None: + """Verify YouTube URLs are resolved to a WAV and played as SFX.""" + engine = make_pipeline_engine() + downloaded = Path("C:/Users/user/AppData/Local/Celune/temporary_audio.wav") + audio = np.ones((8, 2), dtype=np.float32) + + with ( + mock.patch( + "celune.pipeline._download_youtube_sfx", + return_value=(downloaded, "Fixture Video Title"), + ) as download, + mock.patch("celune.pipeline.os.path.exists", return_value=True), + mock.patch("celune.pipeline.sf.read", return_value=(audio, 48000)) as read, + mock.patch( + "celune.pipeline.queue_sfx_audio", return_value=True + ) as queue_audio, + ): + ok = pipeline.play( + cast(Celune, engine), + "https://www.youtube.com/watch?v=demo", + keep=True, + volume=0.4, + ) + + self.assertEqual(ok, True) + download.assert_called_once() + read.assert_called_once_with(str(downloaded), dtype="float32") + queued_args = queue_audio.call_args.args + queued_kwargs = queue_audio.call_args.kwargs + self.assertEqual(queued_args[0], cast(Celune, engine)) + np.testing.assert_allclose(queued_args[1], np.asarray(audio, dtype=np.float32)) + self.assertEqual(queued_args[2:], (48000, "Fixture Video Title", True)) + self.assertEqual(queued_kwargs, {"volume": 0.4}) + + def test_queue_sfx_audio_allows_overlay_while_speech_pipeline_is_locked( + self, + ) -> None: + """Verify SFX sources can be queued while speech already owns the pipeline.""" + engine = make_pipeline_engine() + engine.locked = True + engine.stream = None + engine._stream = None + engine._current_sr = None + engine.current_sr = None + engine.dev = False + engine.current_voice = "balanced" + engine.idle_callback = mock.Mock() + engine.glow = SimpleNamespace(schedule=mock.Mock()) + audio = np.ones((4800, 2), dtype=np.float32) * 0.25 + + ok = pipeline.queue_sfx_audio( + cast(Celune, engine), + audio, + 48000, + "fixture", + ) + + self.assertEqual(ok, True) + self.assertEqual(engine.playback_done.is_set(), False) + queued = list(engine.audio_queue.queue) + self.assertTrue( + any(isinstance(item, pipeline.PlaybackChunk) for item in queued) + ) + self.assertTrue( + any(isinstance(item, pipeline.PlaybackSourceDone) for item in queued) + ) + + def test_playback_worker_mixes_sources_and_glow_receives_mixed_audio(self) -> None: + """Verify the DSP mixer sums overlapping sources before playback/probing.""" + engine = make_pipeline_engine() + engine.stream = None + engine._stream = None + engine._current_sr = None + engine.current_sr = None + engine.dev = False + engine.current_voice = "balanced" + engine.idle_callback = mock.Mock() + glow_calls: list[npt.NDArray[np.float32]] = [] + engine.glow = SimpleNamespace( + schedule=lambda audio: glow_calls.append(np.asarray(audio)) + ) + engine.text_queue = queue.Queue() + engine.audio_queue = queue.Queue() + engine.sentinel = PipelineStates.TERMINATE + engine.force_stop_marker = PipelineStates.UTTERANCE_FORCE_END + fake_stream = FakeStream() + + pipeline._queue_playback_chunk( + cast(Celune, engine), + 1, + np.full((2400, 2), 0.2, dtype=np.float32), + 48000, + ) + pipeline._queue_playback_chunk( + cast(Celune, engine), + 2, + np.full((2400, 2), 0.3, dtype=np.float32), + 48000, + ) + pipeline._queue_playback_done(cast(Celune, engine), 1) + pipeline._queue_playback_done(cast(Celune, engine), 2) + engine.audio_queue.put(engine.sentinel) + + with mock.patch("celune.pipeline.sd.OutputStream", return_value=fake_stream): + pipeline.playback_worker(cast(Celune, engine)) + + self.assertEqual(fake_stream.started, True) + self.assertGreater(len(fake_stream.written), 1) + mixed_audio = np.concatenate(fake_stream.written) + np.testing.assert_allclose(mixed_audio, 0.5, atol=1e-6) + self.assertEqual(len(glow_calls), len(fake_stream.written)) + np.testing.assert_allclose(np.concatenate(glow_calls), 0.5, atol=1e-6) + self.assertEqual(engine.playback_done.is_set(), True) + + def test_playback_worker_reports_live_audio_progress(self) -> None: + """Verify playback progress follows audio position without flooding updates.""" + engine = make_pipeline_engine() + engine.stream = None + engine._stream = None + engine._current_sr = None + engine.current_sr = None + engine.dev = False + engine.current_voice = "balanced" + engine.idle_callback = mock.Mock() + engine.glow = SimpleNamespace(schedule=mock.Mock()) + engine.text_queue = queue.Queue() + engine.audio_queue = queue.Queue() + engine.sentinel = PipelineStates.TERMINATE + engine.force_stop_marker = PipelineStates.UTTERANCE_FORCE_END + fake_stream = FakeStream() + + self.assertEqual( + pipeline.queue_sfx_audio( + cast(Celune, engine), + np.full((2400 * 8, 2), 0.25, dtype=np.float32), + 48000, + "progress.wav", + ), + True, + ) + engine.audio_queue.put(engine.sentinel) + + monotonic_values = iter(i * 0.01 for i in range(500)) + with ( + mock.patch("celune.pipeline.sd.OutputStream", return_value=fake_stream), + mock.patch( + "celune.pipeline.time.monotonic", + side_effect=lambda: next(monotonic_values), + ), + ): + pipeline.playback_worker(cast(Celune, engine)) + + in_flight = [ + (current, total) + for current, total in engine.progress + if current is not None + and total is not None + and total > 1 + and current < total + ] + self.assertTrue(in_flight) + self.assertLess(len(in_flight), len(fake_stream.written)) + self.assertEqual(engine.progress[-1], (1, 1)) + + def test_playback_worker_admits_speech_after_sfx_has_already_started(self) -> None: + """Verify late-arriving speech reaches the DSP while SFX is still active.""" + engine = make_pipeline_engine() + engine.stream = None + engine._stream = None + engine._current_sr = None + engine.current_sr = None + engine.dev = False + engine.current_voice = "balanced" + engine.idle_callback = mock.Mock() + engine.sentinel = PipelineStates.TERMINATE + engine.force_stop_marker = PipelineStates.UTTERANCE_FORCE_END + engine.text_queue = queue.Queue() + engine.audio_queue = queue.Queue() + + class InjectingStream(FakeStream): + """A fake injecting stream.""" + + def __init__(self) -> None: + super().__init__() + self.injected = False + + def write(self, audio: npt.NDArray[np.float32]) -> None: + super().write(audio) + if not self.injected: + self.injected = True + pipeline._queue_playback_chunk( + cast(Celune, engine), + 2, + np.full((2400, 2), 0.4, dtype=np.float32), + 48000, + ) + pipeline._queue_playback_done( + cast(Celune, engine), + 2, + release_pipeline_when_finished=True, + ) + engine.audio_queue.put(engine.sentinel) + + fake_stream = InjectingStream() + pipeline._queue_playback_chunk( + cast(Celune, engine), + 1, + np.full((9600, 2), 0.1, dtype=np.float32), + 48000, + ) + pipeline._queue_playback_done(cast(Celune, engine), 1) + + with mock.patch("celune.pipeline.sd.OutputStream", return_value=fake_stream): + pipeline.playback_worker(cast(Celune, engine)) + + blocks = fake_stream.written + self.assertGreaterEqual(len(blocks), 3) + self.assertTrue(any(np.max(block) > 0.45 for block in blocks[1:])) + self.assertEqual(engine.playback_done.is_set(), True) + + def test_playback_status_restores_prior_sfx_label_after_speech_finishes( + self, + ) -> None: + """Verify mixed playback restores the prior SFX status after speech ends.""" + engine = make_pipeline_engine() + engine.stream = None + engine._stream = None + engine._current_sr = None + engine.current_sr = None + engine.dev = False + engine.current_voice = "balanced" + engine.idle_callback = mock.Mock() + engine.sentinel = PipelineStates.TERMINATE + engine.force_stop_marker = PipelineStates.UTTERANCE_FORCE_END + engine.text_queue = queue.Queue() + engine.audio_queue = queue.Queue() + + class InjectingStream(FakeStream): + """A fake injecting stream.""" + + def __init__(self) -> None: + super().__init__() + self.injected = False + + def write(self, audio: npt.NDArray[np.float32]) -> None: + super().write(audio) + if not self.injected: + self.injected = True + pipeline._set_playback_source_status( + cast(Celune, engine), 2, "Speaking" + ) + pipeline._queue_playback_chunk( + cast(Celune, engine), + 2, + np.full((2400, 2), 0.4, dtype=np.float32), + 48000, + ) + pipeline._queue_playback_done( + cast(Celune, engine), + 2, + release_pipeline_when_finished=True, + ) + engine.audio_queue.put(engine.sentinel) + + fake_stream = InjectingStream() + self.assertEqual( + pipeline.queue_sfx_audio( + cast(Celune, engine), + np.full((9600, 2), 0.1, dtype=np.float32), + 48000, + "loop.wav", + ), + True, + ) + + with mock.patch("celune.pipeline.sd.OutputStream", return_value=fake_stream): + pipeline.playback_worker(cast(Celune, engine)) + + statuses = [msg for msg, _ in engine.statuses] + self.assertIn("Playing loop.wav", statuses) + self.assertIn("Speaking", statuses) + speaking_index = statuses.index("Speaking") + self.assertIn("Playing loop.wav", statuses[speaking_index + 1 :]) + + def test_playback_worker_ducks_sfx_to_quarter_and_restores_with_fades(self) -> None: + """Verify speech ducks SFX to 25 percent, then fades it back up.""" + engine = make_pipeline_engine() + engine.stream = None + engine._stream = None + engine._current_sr = None + engine.current_sr = None + engine.dev = False + engine.current_voice = "balanced" + engine.idle_callback = mock.Mock() + engine.sentinel = PipelineStates.TERMINATE + engine.force_stop_marker = PipelineStates.UTTERANCE_FORCE_END + engine.text_queue = queue.Queue() + engine.audio_queue = queue.Queue() + + class InjectingStream(FakeStream): + """A fake stream that injects speech after SFX has started.""" + + def __init__(self) -> None: + super().__init__() + self.injected = False + + def write(self, audio: npt.NDArray[np.float32]) -> None: + super().write(audio) + if not self.injected: + self.injected = True + pipeline._register_playback_source( + cast(Celune, engine), 2, kind="speech" + ) + pipeline._set_playback_source_status( + cast(Celune, engine), 2, "Speaking" + ) + for _ in range(3): + pipeline._queue_playback_chunk( + cast(Celune, engine), + 2, + np.zeros((2400, 2), dtype=np.float32), + 48000, + ) + pipeline._queue_playback_done( + cast(Celune, engine), + 2, + release_pipeline_when_finished=True, + ) + engine.audio_queue.put(engine.sentinel) + + fake_stream = InjectingStream() + self.assertEqual( + pipeline.queue_sfx_audio( + cast(Celune, engine), + np.ones((2400 * 12, 2), dtype=np.float32), + 48000, + "duck.wav", + volume=0.8, + ), + True, + ) + + with mock.patch("celune.pipeline.sd.OutputStream", return_value=fake_stream): + pipeline.playback_worker(cast(Celune, engine)) + + means = [float(np.mean(block)) for block in fake_stream.written] + self.assertGreaterEqual(len(means), 6) + self.assertGreater(means[0], 0.79) + self.assertLess(min(means), 0.45) + min_index = means.index(min(means)) + self.assertGreater(min_index, 0) + self.assertLess(means[min_index], means[0]) + self.assertGreater(means[-1], means[min_index] + 0.25) + self.assertGreater(means[-1], 0.7) + + def test_force_stop_resets_glow_audio_reactivity(self) -> None: + """Verify forced playback stop clears the glow's audio-reactive state.""" + engine = make_pipeline_engine() + engine.stream = None + engine._stream = None + engine._current_sr = None + engine.current_sr = None + engine.dev = False + engine.current_voice = "balanced" + engine.idle_callback = mock.Mock() + engine.sentinel = PipelineStates.TERMINATE + engine.force_stop_marker = PipelineStates.UTTERANCE_FORCE_END + engine.text_queue = queue.Queue() + engine.audio_queue = queue.Queue() + fake_stream = FakeStream() + + pipeline._queue_playback_chunk( + cast(Celune, engine), + 1, + np.full((2400, 2), 0.3, dtype=np.float32), + 48000, + ) + engine.audio_queue.put(engine.force_stop_marker) + engine.audio_queue.put(engine.sentinel) + + with mock.patch("celune.pipeline.sd.OutputStream", return_value=fake_stream): + pipeline.playback_worker(cast(Celune, engine)) + + engine.glow.reset_audio_reactivity.assert_called_once_with() + self.assertEqual(engine.playback_done.is_set(), True) + engine.idle_callback.assert_called_once_with() + + def test_finalize_playback_idle_resets_glow_audio_reactivity(self) -> None: + """Verify normal playback completion restores the resting glow.""" + engine = make_pipeline_engine() + engine.locked = False + engine.cur_state = "speaking" + engine.dev = False + + pipeline._finalize_playback_idle(cast(Celune, engine)) + + engine.glow.reset_audio_reactivity.assert_called_once_with() + self.assertEqual(engine.playback_done.is_set(), True) + self.assertEqual(engine.cur_state, "idle") + engine.idle_callback.assert_called_once_with() + def test_think_builds_persona_payload_and_queues_response(self) -> None: """Verify Persona request formatting without loading a Persona model. @@ -337,7 +918,7 @@ def post(self, json: JSON) -> FakeResponse: self.assertIn("Read the conversation in ", system_prompt) self.assertIn("Earlier reply.", system_prompt) self.assertIn("user: What now?", system_prompt) - self.assertIn('[Do not reuse or rephrase: "Earlier reply."]', system_prompt) + self.assertIn("The assistant has already acknowledged", system_prompt) self.assertIn("Celune:", system_prompt) self.assertNotIn("", system_prompt) self.assertNotIn("", system_prompt) @@ -387,7 +968,7 @@ def test_persona_context_omits_voice_prompt_when_unsupported(self) -> None: def test_persona_card_uses_baseline_persona_for_non_default_voice_pack( self, ) -> None: - """Verify custom CEVOICE packs do not inherit Celune-specific defaults. + """Verify custom CEVOICE/CECHAR packs do not inherit Celune-specific defaults. Raises: AssertionError: Persona card fallback behavior changes unexpectedly. @@ -462,7 +1043,7 @@ def test_persona_prompt_builder_renders_structured_context_blocks(self) -> None: self.assertIn("assistant: Yes, we catalogued the letters.", prompt) self.assertIn("user: What do you notice?", prompt) self.assertIn( - '[Do not reuse or rephrase: "Yes, we catalogued the letters."]', + "The assistant has already acknowledged", prompt, ) self.assertIn("", prompt) @@ -1061,8 +1642,6 @@ def normalize(value: str) -> str: engine.backend = SimpleNamespace( generate_stream=generate_stream, - generation_progress_total=lambda text: None, - generation_progress_steps=lambda timing: 1, ) engine.model_lock = threading.Lock() engine.model = mock.Mock() diff --git a/tests/test_runtime_and_ui_commands.py b/tests/test_runtime_and_ui_commands.py index 8db5c46..9f08f82 100644 --- a/tests/test_runtime_and_ui_commands.py +++ b/tests/test_runtime_and_ui_commands.py @@ -7,16 +7,17 @@ import warnings from typing import cast from pathlib import Path -from unittest import mock, TestCase from types import SimpleNamespace +from unittest import mock, TestCase +from textual import events from textual.widgets import Button, Label, RichLog, TextArea -from celune.backends.qwen3 import Qwen3 -from celune.celune import Celune +from celune import runtime from celune.config import Config +from celune.celune import Celune +from celune.backends.qwen3 import Qwen3 from celune.constants import APP_NAME, JSONSerializable -from celune import runtime from celune.ui.app import CeluneUI from celune.ui.headless import CeluneHeadlessUI from celune.ui import resources as ui_resources @@ -145,6 +146,24 @@ def log(msg: str, severity: str) -> None: class UICommandTests(TestCase): """Tests for lightweight slash command behavior.""" + @staticmethod + def _thread_runs_immediately(*args, **kwargs): + """Return a thread-like object whose start runs the target immediately.""" + target = kwargs.get("target") + if target is None and args: + target = args[0] + + class _ImmediateThread: + """An immediate thread harness object.""" + + @staticmethod + def start() -> None: + """Start the thread.""" + if target is not None: + target() + + return _ImmediateThread() + def setUp(self) -> None: self.logs: list[tuple[str, str]] = [] self.ui = SimpleNamespace() @@ -161,6 +180,7 @@ def setUp(self) -> None: speed=1.0, reverb=SimpleNamespace(strength=0.0), say=mock.Mock(return_value=True), + play=mock.Mock(return_value=True), vision=SimpleNamespace(enabled=True, talkback=True), ) @@ -276,6 +296,29 @@ def test_say_command_warns_when_text_is_missing(self) -> None: self.ui.celune.say.assert_not_called() self.assertEqual(self.logs[-1], ("Usage: /say ", "warning")) + def test_play_command_passes_optional_volume(self) -> None: + """Verify /play forwards the optional volume argument to Celune.""" + self.ui.celune.play.return_value = True + + with mock.patch( + "celune.ui.commands.threading.Thread", + side_effect=self._thread_runs_immediately, + ): + self._process_command("play", ["tone.wav", "0.4"]) + + self.ui.celune.play.assert_called_once_with("tone.wav", volume=0.4) + self.assertEqual(self.logs[-1], ("Playing tone.wav at 40% volume", "info")) + + def test_play_command_rejects_invalid_volume(self) -> None: + """Verify /play validates a numeric optional volume argument.""" + self._process_command("play", ["tone.wav", "loud"]) + + self.ui.celune.play.assert_not_called() + self.assertEqual( + self.logs[-1], + ("Invalid volume for 'play', must be numeric.", "warning"), + ) + def test_say_command_reports_unmatched_ipa_characters(self) -> None: """Verify /say keeps the usual unmatched-IPA warning path.""" with mock.patch( @@ -407,6 +450,25 @@ def test_headless_ui_warns_without_attached_celune(self) -> None: str(caught[0].message), ) + def test_load_tts_marks_ui_error_when_startup_returns_false(self) -> None: + """Verify handled startup failures leave the UI in an error state.""" + ui = CeluneUI() + ui.celune = cast( + Celune, + SimpleNamespace( + load=lambda: False, + dev=False, + glow=SimpleNamespace(fatal=lambda: None), + ), + ) + ui.error = mock.Mock() + + load_tts = getattr(CeluneUI.load_tts, "__wrapped__", CeluneUI.load_tts) + load_tts(ui) + + ui.error.assert_called_once_with(f"{APP_NAME} could not start") + self.assertEqual(ui.cur_state, "error") + def test_textual_resource_footer_only_advertises_ctrl_q_exit(self) -> None: """Verify the Textual UI footer no longer advertises CTRL+C exit.""" celune = cast( @@ -423,6 +485,25 @@ def test_textual_resource_footer_only_advertises_ctrl_q_exit(self) -> None: exit_page = next(page for page in pages if "CTRL+Q exit" in page) self.assertNotIn("CTRL+C", exit_page) + def test_gpu_usage_handles_closed_stdout_pipe(self) -> None: + """Verify resource polling ignores closed-pipe nvidia-smi failures.""" + proc = mock.Mock() + proc.poll.return_value = 0 + proc.communicate.side_effect = ValueError("I/O operation on closed file.") + + with mock.patch("celune.ui.resources._NVIDIA_SMI", "nvidia-smi"): + previous_proc = ui_resources._NVIDIA_SMI_PROC + previous_usage = ui_resources._NVIDIA_SMI_USAGE + ui_resources._NVIDIA_SMI_PROC = proc + ui_resources._NVIDIA_SMI_USAGE = 42 + try: + self.assertIsNone(ui_resources.gpu_usage()) + self.assertIsNone(ui_resources._NVIDIA_SMI_PROC) + self.assertIsNone(ui_resources._NVIDIA_SMI_USAGE) + finally: + ui_resources._NVIDIA_SMI_PROC = previous_proc + ui_resources._NVIDIA_SMI_USAGE = previous_usage + def test_textual_input_lock_update_with_persona_on_ui_thread(self) -> None: """Verify input state updates update with Persona.""" ui = CeluneUI() @@ -547,3 +628,100 @@ def test_runtime_warning_capture_routes_py_warnings_triton_message(self) -> None ) ], ) + + def test_safe_status_marquees_long_text_for_narrow_status_label(self) -> None: + """Verify long status text scrolls instead of clipping.""" + + class FakeLabel: + """Tiny fake status label with a constrained width.""" + + def __init__(self, width: int) -> None: + self.size = SimpleNamespace(width=width) + self.styles = SimpleNamespace(color=None) + self.rendered = "" + + def update(self, value: str) -> None: + """Update the marquee label text. + + Args: + value: New rendered text captured from the UI update call. + """ + self.rendered = value + + ui = CeluneUI() + fake_status = FakeLabel(width=14) + ui.status = cast(Label, fake_status) + ui.resources = cast(Label, None) + + ui.safe_status("Playing C:/Users/user/Music/really_long_filename_demo.wav") + first = fake_status.rendered + ui._advance_status_marquee() + second = fake_status.rendered + + self.assertNotEqual(first, second) + self.assertTrue(first.startswith(" ")) + self.assertTrue(second.startswith(" ")) + + def test_safe_status_keeps_short_text_static(self) -> None: + """Verify short status text does not marquee.""" + + class FakeLabel: + """Tiny fake status label with a constrained width.""" + + def __init__(self, width: int) -> None: + self.size = SimpleNamespace(width=width) + self.styles = SimpleNamespace(color=None) + self.rendered = "" + + def update(self, value: str) -> None: + """Update the marquee label text. + + Args: + value: New rendered text captured from the UI update call. + """ + self.rendered = value + + ui = CeluneUI() + fake_status = FakeLabel(width=40) + ui.status = cast(Label, fake_status) + ui.resources = cast(Label, None) + + ui.safe_status("Playing") + first = fake_status.rendered + ui._advance_status_marquee() + + self.assertEqual(first, fake_status.rendered) + + def test_resize_repaints_status_after_width_change(self) -> None: + """Verify widening the status label re-renders the current text immediately.""" + + class FakeLabel: + """Tiny fake status label with a mutable width.""" + + def __init__(self, width: int) -> None: + self.size = SimpleNamespace(width=width) + self.styles = SimpleNamespace(color=None) + self.rendered = "" + + def update(self, value: str) -> None: + """Update the marquee label text. + + Args: + value: New rendered text captured from the UI update call. + """ + self.rendered = value + + ui = CeluneUI() + fake_status = FakeLabel(width=14) + ui.status = cast(Label, fake_status) + ui.resources = cast(Label, None) + + message = "Playing C:/Users/user/Music/really_long_filename_demo.wav" + ui.safe_status(message) + narrow = fake_status.rendered + + fake_status.size = SimpleNamespace(width=96) + ui.on_resize(cast(events.Resize, SimpleNamespace())) + + self.assertNotEqual(narrow, fake_status.rendered) + self.assertEqual(fake_status.rendered, f" {message}") diff --git a/tests/test_runtime_paths.py b/tests/test_runtime_paths.py index d3458ae..f302985 100644 --- a/tests/test_runtime_paths.py +++ b/tests/test_runtime_paths.py @@ -2,6 +2,8 @@ """Tests for Celune runtime path handling.""" import tempfile +import sys +import os from pathlib import Path from typing import cast from unittest import TestCase, mock @@ -10,8 +12,9 @@ from textual.widgets import RichLog from celune.constants import APP_SLUG -from celune.paths import ensure_config_path +from celune.paths import ensure_config_path, project_root, running_compiled from celune.persona.memory import default_memory_dir +from celune.cevoice import bundled_voices_dir, default_bundle_path from celune.ui.app import CeluneUI from celune.utils import format_error @@ -19,6 +22,30 @@ class RuntimePathTests(TestCase): """Verify runtime files are written into the user data directory.""" + @staticmethod + def _compiled_root_layout(root_parts: tuple[str, ...]) -> tuple[Path, Path]: + """Return a platform-native fake app root and compiled executable path.""" + if os.name == "nt": + root = Path("C:/", *root_parts) + executable = root / "celune.exe" + return root, executable + + root = Path("/", *root_parts) + executable = root / "celune" + return root, executable + + @staticmethod + def _compiled_bin_layout(root_parts: tuple[str, ...]) -> tuple[Path, Path]: + """Return a platform-native fake repo root and compiled bin executable path.""" + if os.name == "nt": + root = Path("C:/", *root_parts) + executable = root / "bin" / "celune.exe" + return root, executable + + root = Path("/", *root_parts) + executable = root / "bin" / "celune" + return root, executable + def tearDown(self) -> None: """Reset singleton UI guards after each test.""" CeluneUI._instance = None @@ -94,3 +121,59 @@ def test_ensure_config_path_prefers_legacy_repo_config(self) -> None: self.assertTrue(was_created) self.assertEqual(saved["theme"], "light") self.assertEqual(saved["headless"], True) + + def test_running_compiled_detects_compiled_main_module(self) -> None: + """Verify compiled-mode detection checks the active main module.""" + main_module = sys.modules["__main__"] + original = getattr(main_module, "__compiled__", None) + had_attr = hasattr(main_module, "__compiled__") + + # the type errors are suppressed because they are Nuitka specific + try: + main_module.__compiled__ = True # type: ignore[missing-attribute] + self.assertTrue(running_compiled()) + finally: + if had_attr: + main_module.__compiled__ = original # type: ignore[missing-attribute] + else: + delattr(main_module, "__compiled__") + + def test_compiled_project_root_and_bundled_paths_follow_executable(self) -> None: + """Verify bundled files resolve beside the compiled executable.""" + fake_main = type("CompiledMain", (), {"__compiled__": True})() + expected_root, executable = self._compiled_root_layout(("Apps", "Celune")) + + with ( + mock.patch.dict(sys.modules, {"__main__": fake_main}), + mock.patch.object(sys, "argv", [str(executable)]), + ): + self.assertEqual(project_root(), expected_root) + self.assertEqual( + default_bundle_path(), + expected_root / "voices" / "default.cevoice", + ) + self.assertEqual(bundled_voices_dir(), expected_root / "voices") + + def test_compiled_project_root_uses_repo_parent_when_running_from_bin(self) -> None: + """Verify compiled launches from bin/ still resolve the repository root.""" + fake_main = type("CompiledMain", (), {"__compiled__": True})() + expected_root, executable = self._compiled_bin_layout(("repo",)) + + def fake_exists(path: Path) -> bool: + normalized = str(path).replace("\\", "/") + return normalized in { + str(expected_root / "celune").replace("\\", "/"), + str(expected_root / "default_config.yaml").replace("\\", "/"), + str(expected_root / "pyproject.toml").replace("\\", "/"), + } + + with ( + mock.patch.dict(sys.modules, {"__main__": fake_main}), + mock.patch.object(sys, "argv", [str(executable)]), + mock.patch.object(Path, "exists", fake_exists), + ): + self.assertEqual(project_root(), expected_root) + self.assertEqual( + default_bundle_path(), + expected_root / "voices" / "default.cevoice", + ) diff --git a/uv.lock b/uv.lock index 87c1408..e82802f 100644 --- a/uv.lock +++ b/uv.lock @@ -330,11 +330,12 @@ wheels = [ [[package]] name = "celune" -version = "4.0.1" +version = "4.1.0" source = { virtual = "." } dependencies = [ { name = "accelerate" }, { name = "bitsandbytes" }, + { name = "dots-tts" }, { name = "faster-qwen3-tts" }, { name = "hf-xet" }, { name = "httpx" }, @@ -363,6 +364,7 @@ dependencies = [ { name = "torchvision" }, { name = "transformers" }, { name = "voxcpm" }, + { name = "yt-dlp" }, ] [package.optional-dependencies] @@ -376,6 +378,7 @@ api = [ [package.dev-dependencies] dev = [ { name = "docstr-coverage" }, + { name = "nuitka" }, { name = "pylint" }, { name = "pyrefly" }, { name = "pytest" }, @@ -389,6 +392,7 @@ dev = [ requires-dist = [ { name = "accelerate" }, { name = "bitsandbytes" }, + { name = "dots-tts", git = "https://github.com/celunah/dots.tts" }, { name = "fastapi", marker = "extra == 'api'" }, { name = "faster-qwen3-tts", specifier = ">=0.2.4" }, { name = "hf-xet" }, @@ -421,12 +425,14 @@ requires-dist = [ { name = "transformers", specifier = "<5.0.0" }, { name = "uvicorn", marker = "extra == 'api'" }, { name = "voxcpm", specifier = ">=2.0.2" }, + { name = "yt-dlp" }, ] provides-extras = ["api"] [package.metadata.requires-dev] dev = [ { name = "docstr-coverage" }, + { name = "nuitka", specifier = ">=4.1.2" }, { name = "pylint" }, { name = "pyrefly" }, { name = "pytest" }, @@ -783,6 +789,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/5f/ed01f9a3cdffbd5a008556fc7b2a08ddb1cc6ace7effa7340604b1d16699/docstring_parser-0.18.0-py3-none-any.whl", hash = "sha256:b3fcbed555c47d8479be0796ef7e19c2670d428d72e96da63f3a40122860374b", size = 22484, upload-time = "2026-04-14T04:09:18.638Z" }, ] +[[package]] +name = "dots-tts" +version = "0.1.0.post0" +source = { git = "https://github.com/celunah/dots.tts#b448b30c1fb7264ba890347a60ce9b3e64a03c61" } +dependencies = [ + { name = "einops" }, + { name = "gradio" }, + { name = "huggingface-hub" }, + { name = "langcodes", extra = ["data"] }, + { name = "librosa" }, + { name = "lingua-language-detector" }, + { name = "loguru" }, + { name = "numpy" }, + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "safetensors" }, + { name = "soundfile" }, + { name = "torch" }, + { name = "torchaudio" }, + { name = "torchdiffeq" }, + { name = "tqdm" }, + { name = "transformers" }, +] + [[package]] name = "editdistance" version = "0.8.1" @@ -1356,6 +1386,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b5/91/53255615acd2a1eaca307ede3c90eb550bae9c94581f8c00081b6b1c8f44/kiwisolver-1.5.0-graalpy312-graalpy250_312_native-win_amd64.whl", hash = "sha256:1f1489f769582498610e015a8ef2d36f28f505ab3096d0e16b4858a9ec214f57", size = 75987, upload-time = "2026-03-09T13:15:39.65Z" }, ] +[[package]] +name = "langcodes" +version = "3.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a9/75/f9edc5d72945019312f359e69ded9f82392a81d49c5051ed3209b100c0d2/langcodes-3.5.1.tar.gz", hash = "sha256:40bff315e01b01d11c2ae3928dd4f5cbd74dd38f9bd912c12b9a3606c143f731", size = 191084, upload-time = "2025-12-02T16:22:01.627Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/c1/d10b371bcba7abce05e2b33910e39c33cfa496a53f13640b7b8e10bb4d2b/langcodes-3.5.1-py3-none-any.whl", hash = "sha256:b6a9c25c603804e2d169165091d0cdb23934610524a21d226e4f463e8e958a72", size = 183050, upload-time = "2025-12-02T16:21:59.954Z" }, +] + +[package.optional-dependencies] +data = [ + { name = "language-data" }, +] + [[package]] name = "langdetect" version = "1.0.9" @@ -1365,6 +1409,18 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/0e/72/a3add0e4eec4eb9e2569554f7c70f4a3c27712f40e3284d483e88094cc0e/langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0", size = 981474, upload-time = "2021-05-07T07:54:13.562Z" } +[[package]] +name = "language-data" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "marisa-trie" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/61/50/2518b4d0805f4d1f10166837829ad0bd71dcee3ec33fa84aa8c0db23c13c/language_data-1.4.0.tar.gz", hash = "sha256:800e6457e7beda781c156e02d7707e38db2ded026472e07e2c055dc8446ee574", size = 5309660, upload-time = "2025-11-28T13:45:25.217Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/d1/68e2bcca94c9bbdc122a71e504e0b6a6c3e31541b1bad33fee0205996006/language_data-1.4.0-py3-none-any.whl", hash = "sha256:f741927c24ab14cbed2a57bc2bfe82b00cff266c427179597e8b14123364f084", size = 5572678, upload-time = "2025-11-28T13:45:23.381Z" }, +] + [[package]] name = "lazy-loader" version = "0.5" @@ -1403,6 +1459,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b5/ba/c63c5786dfee4c3417094c4b00966e61e4a63efecee22cb7b4c0387dda83/librosa-0.11.0-py3-none-any.whl", hash = "sha256:0b6415c4fd68bff4c29288abe67c6d80b587e0e1e2cfb0aad23e4559504a7fa1", size = 260749, upload-time = "2025-03-11T15:09:52.982Z" }, ] +[[package]] +name = "lingua-language-detector" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/c5/69636ba575cca9f507dd08ffdd4a2d084fdb193aa8e4246a5335bc077678/lingua_language_detector-2.2.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:df29270e5eef3c597e725e11eee778b7111412faab466d390d22ab1d5293bbb8", size = 170204877, upload-time = "2026-03-09T14:24:06.223Z" }, + { url = "https://files.pythonhosted.org/packages/29/05/32568a1afe29e8d2060e4ffefd9d1a67aa2e423db3ab4abbf4f604c81b39/lingua_language_detector-2.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fe367f7c112a0445218407e259338a88af770d5c84a550c20ebe11d5053f03d", size = 172495668, upload-time = "2026-03-09T14:24:18.193Z" }, + { url = "https://files.pythonhosted.org/packages/c9/64/b6212bc0eff72d76dd04649c13452318eb2abeafc397ac597242e47e3e07/lingua_language_detector-2.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ac7453c08ab9699706a92f15480ae3d4b66761c15e1577a1ba31d1635780f3a", size = 170325432, upload-time = "2026-03-09T14:24:29.41Z" }, + { url = "https://files.pythonhosted.org/packages/44/a0/7322a0c50db8f82836ef40b14986dfcfad17bd837bfa5782562fec143bf0/lingua_language_detector-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63d99c7570ba09525f1702e4e4b2362f8f1f7e0a0fba93a3a53d3f322e00659d", size = 170332900, upload-time = "2026-03-09T14:24:40.088Z" }, + { url = "https://files.pythonhosted.org/packages/47/b5/e6d09c3cf08580088cc85807b1b28ef8b77d8c62d50ed56144a565205787/lingua_language_detector-2.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cd54fe6505b671c0d1e33bf0436e8e9308e8802112eb5ba6fb37d2c5459ab685", size = 170500781, upload-time = "2026-03-09T14:24:51.478Z" }, + { url = "https://files.pythonhosted.org/packages/e3/f2/ef84cc7f57854838f9b64f1b8aae07ee56827b5538b9609acb72aa6832e5/lingua_language_detector-2.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:362fbbc21da68c778f3521f42309d1ed6f54d4bd554a5701bf165419be9cc64b", size = 170586077, upload-time = "2026-03-09T14:25:04.48Z" }, + { url = "https://files.pythonhosted.org/packages/97/48/bb581e0deda48169a11d25467d9fbe3ef4792b4d5363144bbea08caa9dd2/lingua_language_detector-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:98baee0c51e31d0b54a92a4795aca6ca7069de9b99dc783e3456a91abd2ff692", size = 170065705, upload-time = "2026-03-09T14:25:16.796Z" }, + { url = "https://files.pythonhosted.org/packages/45/a8/197f06b3d2da6ffb580d20e0b46181ef6d34fd750c7930ec04b322767cfb/lingua_language_detector-2.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:581bfb3405dd99863b04753812021f2554545c4c2783d0faa41af44535c759a1", size = 169977215, upload-time = "2026-03-09T14:25:31.373Z" }, + { url = "https://files.pythonhosted.org/packages/0c/d3/b4647a233d4d8ef411519c7259c5b607b20568cb993d976319ae3f260eea/lingua_language_detector-2.2.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:d52dc5a54bb245b1d9df54620810e7b72a247f8ca4276659a9893fe415faff37", size = 170204448, upload-time = "2026-03-09T14:25:41.286Z" }, + { url = "https://files.pythonhosted.org/packages/6e/cd/248053f61de66faa866bb4eb7190af1c2e67fa363f8193444a5aee5c1706/lingua_language_detector-2.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0bb20bfe60b64012cd71f85bfdf5c79fc2e916590a9f69c3a9b01a44fbfd2244", size = 172495363, upload-time = "2026-03-09T14:25:53.585Z" }, + { url = "https://files.pythonhosted.org/packages/25/88/ad5e9b8b21f4c5eeecd5d08539bf6ec869df87a491d779b8756501db6a71/lingua_language_detector-2.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ed86c6e803a585853298623d9ee683bd08bcd15c2543c045ef059a090823fc8", size = 170326018, upload-time = "2026-03-09T14:26:04.612Z" }, + { url = "https://files.pythonhosted.org/packages/53/a5/b93c76728294e4eaf01f442fa7e9da913963d638915ce0aafd0220bc9902/lingua_language_detector-2.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fbf936b47ef4fdd7043ebb4159d4a5f1c3648028e19d6e3c60464abc5f5e195", size = 170332278, upload-time = "2026-03-09T14:26:14.118Z" }, + { url = "https://files.pythonhosted.org/packages/21/90/7f0f4c131cd0686c0f77157545b599b5023b00fa44ffb4a1c24a4c861cb3/lingua_language_detector-2.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:126899985870ada7f9630fb984a0763741bb7fde42adfc077e6f415e49e407b5", size = 170500970, upload-time = "2026-03-09T14:26:28.07Z" }, + { url = "https://files.pythonhosted.org/packages/f4/71/24d9d151ccf35cd001d8570d22dc1d305e632eee7ff1252764be8fb081f3/lingua_language_detector-2.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c0961ec8f616897f5e91c7c3a5422d2d3aa48493954f2c425f2fca522a253916", size = 170585841, upload-time = "2026-03-09T14:26:39.904Z" }, + { url = "https://files.pythonhosted.org/packages/35/a6/e087ba2c47eb86899020915fb6bf47b0f956eda9c61cabc742bc832c1b3c/lingua_language_detector-2.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:250517a581cfa098a451299aa913e9756aee9f738b0b248259fc634eeffeb2cf", size = 170065737, upload-time = "2026-03-09T14:26:53.2Z" }, + { url = "https://files.pythonhosted.org/packages/81/e7/4ed636d7d7e4605ce170ce70a566b45f70eed79ec9cdb5c9bc821892c1cd/lingua_language_detector-2.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:9fc04412287d254982612dafe2dae2073e1feeedffbee8d4ddff4b961218cb69", size = 169977074, upload-time = "2026-03-09T14:27:04.064Z" }, +] + [[package]] name = "linkify-it-py" version = "2.1.0" @@ -1431,6 +1510,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/46/9f/4898b44e4042c60fafcb1162dfb7014f6f15b1ec19bf29cfea6bf26df90d/llvmlite-0.47.0-cp313-cp313-win_amd64.whl", hash = "sha256:c2e9adf8698d813a9a5efb2d4370caf344dbc1e145019851fee6a6f319ba760e", size = 38138695, upload-time = "2026-03-31T18:29:15.43Z" }, ] +[[package]] +name = "loguru" +version = "0.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "win32-setctime", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, +] + +[[package]] +name = "marisa-trie" +version = "1.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/77/5d/e235921b5b74818cb65b557fa05cc6201c2c1612d4866ff75c835bcf808d/marisa_trie-1.4.1.tar.gz", hash = "sha256:44ce3bdbeb7c950d463e460184fc3e18702df9ef0edb826bac672fd789fb1d20", size = 261581, upload-time = "2026-04-08T07:17:52.991Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/b7/89811f7eba6e92386279376df81cfa281ab99e30f7e4f5a5e04d8dba6b99/marisa_trie-1.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:63964dedbf49ef0d17cb32d368f13ec71ca0ec026976b1cc24cb6a993d05752a", size = 206731, upload-time = "2026-04-08T07:16:37.439Z" }, + { url = "https://files.pythonhosted.org/packages/f7/8b/cc34313149486dfc13e84303e12d61fd55788b37d92c3e082cc3d142e776/marisa_trie-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:87e65dff37d1b9edea7bc7a8e935c851ec4934f2e56071a4501ce8db97b579a4", size = 190988, upload-time = "2026-04-08T07:16:38.686Z" }, + { url = "https://files.pythonhosted.org/packages/15/0c/376e21c62bd0e658a5e9f6b8912f3116591778c639857ad374c7639ceebe/marisa_trie-1.4.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7bed50d39ff1391a67b9383a7f3c458a1a0cb40fe8dd16952f813fbf8939eeff", size = 1471836, upload-time = "2026-04-08T07:16:39.88Z" }, + { url = "https://files.pythonhosted.org/packages/bb/95/cd6e73d0857608f2946f3bc5ccac86488073b96fb37bc1b45b0184268bed/marisa_trie-1.4.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d51bdd22a7238ef4d681effd7c224a267ddae054b64b1cec9ce95bbcd2b6a88", size = 1516414, upload-time = "2026-04-08T07:16:41.4Z" }, + { url = "https://files.pythonhosted.org/packages/51/73/339e8fab2e8cea88e9e0fd78aeb8ccdd3f8656d228dae2cb698f667a0fe7/marisa_trie-1.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8da4dea083209301430d80c8a33d0a5ecb6a270c904743505adceaae4fface2", size = 2394325, upload-time = "2026-04-08T07:16:43.139Z" }, + { url = "https://files.pythonhosted.org/packages/5c/d7/0ba8bcaeee68a8e6cbc61b47825370a6c8a523ab16ea42e8728dec2213bc/marisa_trie-1.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e7f9603cc8a57dca847febf45349c51916c3e1340eb6ee064baabf181398dc79", size = 2510974, upload-time = "2026-04-08T07:16:44.848Z" }, + { url = "https://files.pythonhosted.org/packages/23/ef/fa342fdbc0c055030b93007dff5393675705071a14621e3f81bfb52eb970/marisa_trie-1.4.1-cp312-cp312-win32.whl", hash = "sha256:63cd2870f3890f2657610ed437110713e87972da0dc4d3e6303d370c9b28d215", size = 138890, upload-time = "2026-04-08T07:16:46.871Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/419114325f1bb4c2202c20f19f424f9dea1cc38de7cd1fae60d991e99b69/marisa_trie-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:fc9bc6de7197cdd1f32b72566cc7ac75c465d6f2191bba51d17edfae2b5ca8b0", size = 168513, upload-time = "2026-04-08T07:16:48.475Z" }, + { url = "https://files.pythonhosted.org/packages/8f/f8/ae0dcbf79498b7aa00dae740982c9812fa95339bc6549ea63b4ad15eeb58/marisa_trie-1.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:59375ab1e4e4cee87d318b6b3dffa91c599c89afd920ef53428235f4326ba1d6", size = 139710, upload-time = "2026-04-08T07:16:49.863Z" }, + { url = "https://files.pythonhosted.org/packages/91/df/a6b189cfdfc45fc402833fa067b1625a8ec4ef5446a8d7c08c5c84ea835e/marisa_trie-1.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cde5209f0904209866e5c2ff5bdadc3d57bc9368ad3e26eac72a16d863e83dc0", size = 206631, upload-time = "2026-04-08T07:16:51.18Z" }, + { url = "https://files.pythonhosted.org/packages/4f/1b/7b03330888306166e96801acb5086eaf5ddc112d2ab8c03c8de478da7346/marisa_trie-1.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:21ff39c29d900b44876913c96d0e2c550417450340fef5c8848111796a7f9de1", size = 190110, upload-time = "2026-04-08T07:16:52.276Z" }, + { url = "https://files.pythonhosted.org/packages/dc/41/6ae103ef7448320a7324f9866a253d595159ffe367c11e06baabb92ca4d2/marisa_trie-1.4.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:84ce9b69a0516a52169d28e27ea14f015f6daf467fc8cb661eb841565d728ccf", size = 1474539, upload-time = "2026-04-08T07:16:53.924Z" }, + { url = "https://files.pythonhosted.org/packages/1b/dc/cbb5e8416ff5d193847b0e838b0b525773af7b6f4e1e4a33728d1b097fcb/marisa_trie-1.4.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eaa3db575cb757f98d2754bcab5e1e0b2a884dc611964ac2659be13b58ef32e8", size = 1501031, upload-time = "2026-04-08T07:16:55.554Z" }, + { url = "https://files.pythonhosted.org/packages/88/5c/ed86ad8683237dff8cdeb117b3b0664005e05bc221535301621ed474857d/marisa_trie-1.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:71ab0be7b380d65871986d61839814153f55307ff593bac22109e65e804f07d4", size = 2397211, upload-time = "2026-04-08T07:16:57.199Z" }, + { url = "https://files.pythonhosted.org/packages/49/a3/2596d55ee48ed15a4d0de5a9ebd27de49888a029ffa521717c282df63efc/marisa_trie-1.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:658f49e4e825b4e4257f53f455e214cd0e161ab326e569562cc8ff8f67a48506", size = 2498742, upload-time = "2026-04-08T07:16:58.779Z" }, + { url = "https://files.pythonhosted.org/packages/fe/d1/50c0ed09c99e4cd3ff7276f1a50a148c7bd4e585271215d3a05f74228bf9/marisa_trie-1.4.1-cp313-cp313-win32.whl", hash = "sha256:a56d6daf4449ae5f6825a03f9fabb97e56527fb44bc4a608944a872794f662d5", size = 138706, upload-time = "2026-04-08T07:17:00.258Z" }, + { url = "https://files.pythonhosted.org/packages/68/67/b35e8b14757ce5daffd5c4c1ab0bb9b3e3c7611e82fe5ab2707489a176c4/marisa_trie-1.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:6a5d45561a5e6563a0f934899a097d69e74111181b162de4b64cceb31f1bf44b", size = 168903, upload-time = "2026-04-08T07:17:01.419Z" }, + { url = "https://files.pythonhosted.org/packages/da/91/bd06914afcb70710f684be44cf5435742d175d49c3de021ee62f7eb8c4e4/marisa_trie-1.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:ab28fda06ef2e488240a17d3f9947447e7f1786ad04fb29584ab4a27fde656f4", size = 139620, upload-time = "2026-04-08T07:17:02.354Z" }, + { url = "https://files.pythonhosted.org/packages/75/b5/3823948064c63fd76777910b45481c6e251c9d4c3f261ca23d51b758dc91/marisa_trie-1.4.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:efa5b4f8202c199ef7f4afe00ca4e406ea77b3940355595aabea8e9b393a22b1", size = 213383, upload-time = "2026-04-08T07:17:03.766Z" }, + { url = "https://files.pythonhosted.org/packages/ae/f1/5c77eea2ff285e47e8a523385f023075a452455ffa55804df95e4b569a12/marisa_trie-1.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a8775892a5a96df359fa8853e6132b9504dfcc2ecebd27bb617cc5be6ffeb13d", size = 202037, upload-time = "2026-04-08T07:17:04.745Z" }, + { url = "https://files.pythonhosted.org/packages/2a/0d/0ea5e7f0aaa11c29aadaf121b6de275a6807d9712ef28ea4a6025bcab2e4/marisa_trie-1.4.1-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc68d4cdd7f1be60786888497f50c6fb8ad4f17bbec1d7accfc3fe69e725a329", size = 1542413, upload-time = "2026-04-08T07:17:06.247Z" }, + { url = "https://files.pythonhosted.org/packages/8a/a9/fe6aa360eba3178cd74796b5e0d6d07a1afabe5b09b54fa8c2aa693a53c8/marisa_trie-1.4.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e759fb722a16b7db6a5fcb2ffe8c2feabf4a6143b487d21388bc5c156a79e90", size = 1545503, upload-time = "2026-04-08T07:17:07.501Z" }, + { url = "https://files.pythonhosted.org/packages/60/42/2d80e091d2b92f7175be488f554565d0e0a432b1d61c4804177e0ec9ecce/marisa_trie-1.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0666b071851fff8b687bc6c0c899c7ce1cb6119399ed8c3c4f4526aca876a5e2", size = 2447028, upload-time = "2026-04-08T07:17:09.29Z" }, + { url = "https://files.pythonhosted.org/packages/28/8b/f9cb7fab4a0053dd9caed573791ab292f8b890a31bd5f159ef21dbe40e63/marisa_trie-1.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6e494d5e88da58695fa9e2efc222de808ebd36b306a2c7162256d00fb06e733e", size = 2536022, upload-time = "2026-04-08T07:17:11.222Z" }, + { url = "https://files.pythonhosted.org/packages/64/00/ad53cf35464937b7719ed7a6e21354418f998df9002944cfa5f14903f460/marisa_trie-1.4.1-cp313-cp313t-win32.whl", hash = "sha256:50b2bbfc6612e0b5f7bd399c3097e166e5dab2b79a58e9b956ef9127b90d2d6e", size = 153959, upload-time = "2026-04-08T07:17:12.799Z" }, + { url = "https://files.pythonhosted.org/packages/02/e5/89ae70c984c178a5cf0fa95c8c47aab129b73641ec3729ff56cc5039abc1/marisa_trie-1.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:932e97f23815c999d8d641f79c934fe1c841eab34bd01051552822e78bba919c", size = 186646, upload-time = "2026-04-08T07:17:13.88Z" }, + { url = "https://files.pythonhosted.org/packages/82/84/514da5bd3ee051e800caf1ce687b7727a92b643416cc678dc47d3eface97/marisa_trie-1.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:0b2e53f87c01b99c59cda37411a234c704a95d12f4787aeb29572fa9302f2b93", size = 146566, upload-time = "2026-04-08T07:17:15.175Z" }, +] + [[package]] name = "markdown-it-py" version = "4.2.0" @@ -1708,6 +1835,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, ] +[[package]] +name = "nuitka" +version = "4.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e6/2e/9ea398ca1a4fc458958fdf477ae18d3395bee8c9f8950ca6f0f039ea2585/nuitka-4.1.2.tar.gz", hash = "sha256:efc2359b171d7b63046ca8ec8dee57015c3466a9df74b68a049c2c1a7e93ecee", size = 4561050, upload-time = "2026-05-28T08:26:07.947Z" } + [[package]] name = "numba" version = "0.65.1" @@ -2769,24 +2902,26 @@ wheels = [ [[package]] name = "safetensors" -version = "0.7.0" +version = "0.8.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" } +sdist = { url = "https://files.pythonhosted.org/packages/45/06/f955dbbb1859e3bd23c8ac6141af5106e7ad5fedec4a3a6e3d60f94b7001/safetensors-0.8.0.tar.gz", hash = "sha256:fabaf3e0f18a6618d9b36560682562157f77c2b71fcffc7b432be2baed9d753d", size = 325846, upload-time = "2026-06-09T07:52:25.563Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" }, - { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" }, - { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" }, - { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" }, - { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" }, - { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" }, - { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" }, - { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" }, - { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" }, - { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" }, - { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" }, - { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" }, - { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" }, - { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" }, + { url = "https://files.pythonhosted.org/packages/39/a0/f718cda65b05407d228f97602cf60dca269c979867aa5beb25410de26cd3/safetensors-0.8.0-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c554f85858e05226d3c2828e32395e677434685d6d94594a41643361c5e837f0", size = 473568, upload-time = "2026-06-09T07:52:18.829Z" }, + { url = "https://files.pythonhosted.org/packages/f5/b1/fa7c600e7dceae12e9606c7578cbc9ff1e1ed55844883ee5c92205e86226/safetensors-0.8.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:c80201d22cbf405b80647a60ada77bba06c8fba2da2743ba1e89cdcc39a81f25", size = 484562, upload-time = "2026-06-09T07:52:17.518Z" }, + { url = "https://files.pythonhosted.org/packages/09/7d/65a7de0af421317bb36a067241e4235fff194eed60b961ed6d3f59a3fc60/safetensors-0.8.0-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a46e5ff292c356d6991e60942ba7f79817682d3a2cef0702136448cb9c4d235", size = 502844, upload-time = "2026-06-09T07:52:07.624Z" }, + { url = "https://files.pythonhosted.org/packages/91/4f/3175c9d75634e0e0dda0082794193521035edd7c70a6f212bf33ca06ddf4/safetensors-0.8.0-cp310-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4124502b78f03534117c848f87a39b8f31e577b15eff423bf8bfb95f2a8c30d0", size = 511823, upload-time = "2026-06-09T07:52:09.565Z" }, + { url = "https://files.pythonhosted.org/packages/20/87/846c289e7aa2299eff406335717cf43ce8777194ece8aad75772e0411615/safetensors-0.8.0-cp310-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bc0a787ba8a35be368ee3574edfa2b1ad389eebd0a72e482ae275490e3f6c98", size = 633461, upload-time = "2026-06-09T07:52:11.128Z" }, + { url = "https://files.pythonhosted.org/packages/76/22/8d64d9df2c45d5ded401df889d0ad90882804ca172d79ec4f0df8f727fe0/safetensors-0.8.0-cp310-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:040070828e36dc8e122178bbbd5830ff9e97920affb84cbe0f46442497bed358", size = 545148, upload-time = "2026-06-09T07:52:13.603Z" }, + { url = "https://files.pythonhosted.org/packages/28/50/f203ff3a3ddfe19308efc83c5a3a29ed02bf786732ec35e68bf9162f3365/safetensors-0.8.0-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd6f3f93c9a0a7cc2788ee63fb763353d4bd2e89b0751bc78fcf7dda00bea774", size = 516040, upload-time = "2026-06-09T07:52:16.29Z" }, + { url = "https://files.pythonhosted.org/packages/46/fb/cdaed17ceb2948784fd9c36b6fd3e951b608547cea81a48e8ee6f8cfdfcb/safetensors-0.8.0-cp310-abi3-manylinux_2_31_riscv64.whl", hash = "sha256:fcdd41ec4628fee5799f807c73c353629130fbd942aa23d83c623dd6c9d52d78", size = 513832, upload-time = "2026-06-09T07:52:12.37Z" }, + { url = "https://files.pythonhosted.org/packages/0d/49/1e15de264dcc3b77943d2d0c56a95809956883b1c2d6d585c792523f180b/safetensors-0.8.0-cp310-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8e9f537aa183a38ace122d27303dcd986b26bd2a7591f9181d7f0c396f4677ca", size = 559930, upload-time = "2026-06-09T07:52:14.743Z" }, + { url = "https://files.pythonhosted.org/packages/2a/43/bf38443278eab4b1be1fce2931e2b012ad9cb7df52ada751d0aab8f7659a/safetensors-0.8.0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:87eec7ffed2b809f05a398a8becb7d013f19f7837cd15d9748580d6cf30dbaf4", size = 678670, upload-time = "2026-06-09T07:52:20.032Z" }, + { url = "https://files.pythonhosted.org/packages/72/e3/68cd3fa5b48488e84add63e04cb12f3bc28ae4638c06d4508c6e88823d0e/safetensors-0.8.0-cp310-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:4a95ae2b05d7726d751da4ebf626a2ca782b706e101bd894c95bc2450b1cffcc", size = 786679, upload-time = "2026-06-09T07:52:21.322Z" }, + { url = "https://files.pythonhosted.org/packages/29/4b/1c19c509d56e01f4fbb3d0a2e597450f6cc04d1d56cf52defb0a62dfd715/safetensors-0.8.0-cp310-abi3-musllinux_1_2_i686.whl", hash = "sha256:3ae091f16662658bdc019a4ff6cb4c085bb7d725eb5978b183ffd265863b6d2d", size = 765683, upload-time = "2026-06-09T07:52:22.594Z" }, + { url = "https://files.pythonhosted.org/packages/27/43/41c1621732edd934d868a00d1b891584c892a7b62a9aab82ea5a0a5623ee/safetensors-0.8.0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8e080062fcde23be189565e1c3305d16751a218ecf9412c8601e64204eb6f846", size = 722361, upload-time = "2026-06-09T07:52:23.924Z" }, + { url = "https://files.pythonhosted.org/packages/8e/3f/73ccf82579412b4a71c4ca673f10b5f1f888d7cf5af7fe24f27d30307be4/safetensors-0.8.0-cp310-abi3-win32.whl", hash = "sha256:2ddf52eac562eda224f99acfa7889d02968c1fd59a5b011ae7d8137c37e9c02d", size = 342401, upload-time = "2026-06-09T07:52:28.895Z" }, + { url = "https://files.pythonhosted.org/packages/1b/6d/3fba214c1e5e0f69991677ec3bc17023f0421776975e1de0c682dca475e2/safetensors-0.8.0-cp310-abi3-win_amd64.whl", hash = "sha256:096ec1a98435df7beb08853bb5aa9081a84f23d0adc67ed1a0a10550f608373f", size = 355540, upload-time = "2026-06-09T07:52:27.832Z" }, + { url = "https://files.pythonhosted.org/packages/8d/fc/7eedc3510d97878876e32774eebbeb61c43f148a96e915c84229a3e967aa/safetensors-0.8.0-cp310-abi3-win_arm64.whl", hash = "sha256:f7838e5135a406ad3e02efdcb8cf2e5397d368b0154537c4fec682dbc544d452", size = 340500, upload-time = "2026-06-09T07:52:26.745Z" }, ] [[package]] @@ -3306,6 +3441,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9d/34/d219959a2a19aa5461aa853c1d0e80a5ee8e2c3b13f40881ba2b6d4a4a5b/torchcodec-0.13.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b745aff148e91c83443ee50f3b587f9bf805ee790225aeefcb89ffaf0dbe241c", size = 2516563, upload-time = "2026-05-21T11:15:19.267Z" }, ] +[[package]] +name = "torchdiffeq" +version = "0.2.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "scipy" }, + { name = "torch" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/87/ec/a40aa124660f0ee65e6760cb53df6a82ad91a1a3ef1da5e747f1336644dd/torchdiffeq-0.2.5.tar.gz", hash = "sha256:b50d3760d13fd138dcceac651f4b80396f44fefcebd037a033fecfeaa9cc12e7", size = 31197, upload-time = "2024-11-21T20:20:11.552Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b9/35/537f64f2d0b3cfebaae0f903b4e3a3b239abcc99d0f73cb15b9cee9b8212/torchdiffeq-0.2.5-py3-none-any.whl", hash = "sha256:aa1db4bed13bd04952f28a53cdf4336d1ab60417c1d9698d7a239fec1cf2bcf8", size = 32902, upload-time = "2024-11-21T20:20:09.938Z" }, +] + [[package]] name = "torchvision" version = "0.26.0+cu128" @@ -3567,6 +3715,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a1/58/b778473e32f4afa877f1313b137edac2c9cef40da571078a351507aa362a/wetext-0.1.2-py3-none-any.whl", hash = "sha256:7467ac08a4bb44523780626437b9b0718cd611b1ccb4304bc49b649fe29370ee", size = 1771722, upload-time = "2025-11-28T07:33:47.177Z" }, ] +[[package]] +name = "win32-setctime" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" }, +] + [[package]] name = "xxhash" version = "3.7.0" @@ -3684,3 +3841,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/cc/a7beb239f78f27fca1b053c8e8595e4179c02e62249b4687ec218c370c50/yarl-1.24.2-cp313-cp313-win_arm64.whl", hash = "sha256:1e831894be7c2954240e49791fa4b50c05a0dc881de2552cfe3ffd8631c7f461", size = 87069, upload-time = "2026-05-19T21:29:54.442Z" }, { url = "https://files.pythonhosted.org/packages/fd/4d/4b880086bd0d3e034d25647be1d830afc3e3f610e98c4ab3490af6b1b6d5/yarl-1.24.2-py3-none-any.whl", hash = "sha256:2783d9226db8797636cd6896e4de81feed252d1db72265686c9558d97a4d94b9", size = 53576, upload-time = "2026-05-19T21:31:03.909Z" }, ] + +[[package]] +name = "yt-dlp" +version = "2026.3.17" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/34/7c6b4e3f89cb6416d2cd7ab6dab141a1df97ab0fb22d15816db2c92148c9/yt_dlp-2026.3.17.tar.gz", hash = "sha256:ba7aa31d533f1ffccfe70e421596d7ca8ff0bf1398dc6bb658b7d9dec057d2c9", size = 3119221, upload-time = "2026-03-17T23:43:00.244Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cd/13/5093bcb954878e50f7217fd2ab94282b53934022e4e4a03265582da83bf5/yt_dlp-2026.3.17-py3-none-any.whl", hash = "sha256:32992db94303a8a5d211a183f2174834fe7f8c29d83ed2e7a324eae97a8f26d8", size = 3315134, upload-time = "2026-03-17T23:42:57.863Z" }, +]