From 16836230ed44106d7f58d865517708b93d15fa9d Mon Sep 17 00:00:00 2001 From: Dewan Shakil Date: Wed, 13 May 2026 00:49:19 +0530 Subject: [PATCH 1/7] Add analytics tracking --- README.md | 12 +++ kittentts/__init__.py | 10 ++- kittentts/analytics.py | 175 ++++++++++++++++++++++++++++++++++++++++ kittentts/get_model.py | 81 +++++++++++++++++-- tests/__init__.py | 1 + tests/test_analytics.py | 149 ++++++++++++++++++++++++++++++++++ 6 files changed, 418 insertions(+), 10 deletions(-) create mode 100644 kittentts/analytics.py create mode 100644 tests/__init__.py create mode 100644 tests/test_analytics.py diff --git a/README.md b/README.md index 88fa577..3541eff 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,18 @@ print(model.available_voices) # ['Bella', 'Jasper', 'Luna', 'Bruno', 'Rosie', 'Hugo', 'Kiki', 'Leo'] ``` +### Analytics + +Kitten TTS sends anonymous generation analytics to the KittenTTS ingest API. The SDK does not include PostHog or any analytics-provider SDK. + +Analytics can be disabled at model creation: + +```python +model = KittenTTS("KittenML/kitten-tts-mini-0.8", analytics=False) +``` + +The SDK generates a persistent anonymous install ID and sends generation metadata such as SDK version, platform, runtime version, selected model, selected voice, generation type, and asset source. Analytics runs in the background with a short timeout and never blocks or fails TTS generation. + ### Using with GPU ``` diff --git a/kittentts/__init__.py b/kittentts/__init__.py index 9cf1a2d..2ac6276 100644 --- a/kittentts/__init__.py +++ b/kittentts/__init__.py @@ -1,7 +1,13 @@ -from kittentts.get_model import get_model, KittenTTS - __version__ = "0.1.0" __author__ = "KittenML" __description__ = "Ultra-lightweight text-to-speech model with just 15 million parameters" __all__ = ["get_model", "KittenTTS"] + + +def __getattr__(name): + if name in {"get_model", "KittenTTS"}: + from kittentts.get_model import KittenTTS, get_model + + return {"get_model": get_model, "KittenTTS": KittenTTS}[name] + raise AttributeError(f"module 'kittentts' has no attribute {name!r}") diff --git a/kittentts/analytics.py b/kittentts/analytics.py new file mode 100644 index 0000000..93e1380 --- /dev/null +++ b/kittentts/analytics.py @@ -0,0 +1,175 @@ +"""Small, dependency-free analytics client for KittenTTS SDK events.""" + +import json +import os +import platform as platform_module +import re +import sys +import threading +import uuid +from datetime import datetime, timezone +from pathlib import Path +from typing import Callable, Dict, Optional +from urllib import request + +ANALYTICS_ENDPOINT = "https://kittentts-analytics.dewana-sl.workers.dev/v1/track" +SDK_TYPE = "python" +DEFAULT_TIMEOUT_SECONDS = 3.0 + +_MODEL_VERSION_RE = re.compile(r"^(?P.+?)-(?P\d+(?:\.\d+)*(?:-[A-Za-z0-9]+)*)$") + + +def analytics_enabled(value=True) -> bool: + if value is False: + return False + env_value = os.environ.get("KITTENTTS_ANALYTICS") + if env_value and env_value.strip().lower() in {"0", "false", "off", "no"}: + return False + return True + + +def current_platform() -> str: + system = platform_module.system().lower() + if system == "darwin": + return "macos" + if system == "windows": + return "windows" + if system == "linux": + return "linux" + return "unknown" + + +def runtime_version() -> str: + return f"python {sys.version_info.major}.{sys.version_info.minor}" + + +def parse_model_name(model_name: str) -> Dict[str, str]: + repo_name = str(model_name).rstrip("/").split("/")[-1] or str(model_name) + match = _MODEL_VERSION_RE.match(repo_name) + if not match: + return {"selected_model": repo_name, "model_version": "unknown"} + return { + "selected_model": match.group("model"), + "model_version": match.group("version"), + } + + +def error_code(error: BaseException) -> str: + name = error.__class__.__name__ + words = re.sub(r"(? str: + if not self._anonymous_id: + self._anonymous_id = load_or_create_anonymous_id(self._anonymous_id_path) + return self._anonymous_id + + def track_generation( + self, + selected_voice: str, + generation: str, + sdk_error_code: Optional[str] = None, + ) -> None: + if not self.enabled: + return + + payload = { + "anonymous_id": self.anonymous_id, + "client_event_id": str(uuid.uuid4()), + "timestamp": datetime.now(timezone.utc).isoformat(), + "sdk_version": self.sdk_version, + "sdk_type": SDK_TYPE, + "platform": current_platform(), + "runtime_version": runtime_version(), + "selected_model": self.selected_model, + "model_version": self.model_version, + "selected_voice": str(selected_voice), + "generation": generation, + "asset_source": self.asset_source, + } + if sdk_error_code: + payload["sdk_error_code"] = sdk_error_code + + if self._async_delivery: + thread = threading.Thread(target=self._send, args=(payload,), daemon=True) + thread.start() + else: + self._send(payload) + + def _send(self, payload: Dict[str, str]) -> None: + try: + self._post_json(self.endpoint, payload, self.timeout_seconds) + except Exception: + return + + +def post_json_request(endpoint: str, payload: Dict[str, str], timeout_seconds: float) -> None: + body = json.dumps(payload).encode("utf-8") + req = request.Request( + endpoint, + data=body, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with request.urlopen(req, timeout=timeout_seconds) as response: + response.read() + + +def default_anonymous_id_path() -> Path: + configured_home = os.environ.get("KITTENTTS_ANALYTICS_HOME") + if configured_home: + return Path(configured_home).expanduser() / "anonymous_id" + return Path.home() / ".kittentts" / "analytics_id" + + +def load_or_create_anonymous_id(path: Path) -> str: + try: + existing = path.read_text(encoding="utf-8").strip() + if is_uuid(existing): + return existing + except OSError: + pass + + anonymous_id = str(uuid.uuid4()) + try: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(anonymous_id, encoding="utf-8") + except OSError: + pass + return anonymous_id + + +def is_uuid(value: str) -> bool: + try: + uuid.UUID(value) + except (TypeError, ValueError): + return False + return True diff --git a/kittentts/get_model.py b/kittentts/get_model.py index 8af5561..74578a2 100644 --- a/kittentts/get_model.py +++ b/kittentts/get_model.py @@ -1,18 +1,20 @@ import json import os -from huggingface_hub import hf_hub_download -from .onnx_model import KittenTTS_1_Onnx +from importlib import metadata + +from .analytics import AnalyticsClient, error_code, parse_model_name class KittenTTS: """Main KittenTTS class for text-to-speech synthesis.""" - def __init__(self, model_name="KittenML/kitten-tts-nano-0.8", cache_dir=None, backend=None): + def __init__(self, model_name="KittenML/kitten-tts-nano-0.8", cache_dir=None, backend=None, analytics=True): """Initialize KittenTTS with a model from Hugging Face. Args: model_name: Hugging Face repository ID or model name cache_dir: Directory to cache downloaded files + analytics: Set to False to disable anonymous generation analytics """ # Handle different model name formats if "/" not in model_name: @@ -22,6 +24,14 @@ def __init__(self, model_name="KittenML/kitten-tts-nano-0.8", cache_dir=None, ba repo_id = model_name self.model = download_from_huggingface(repo_id=repo_id, cache_dir=cache_dir, backend=backend) + model_info = parse_model_name(repo_id) + self.analytics = AnalyticsClient( + sdk_version=_sdk_version(), + selected_model=model_info["selected_model"], + model_version=model_info["model_version"], + asset_source=getattr(self.model, "analytics_asset_source", "runtime-download"), + enabled=analytics, + ) def generate(self, text, voice="expr-voice-5-m", speed=1.0, clean_text=False): """Generate audio from text. @@ -35,7 +45,13 @@ def generate(self, text, voice="expr-voice-5-m", speed=1.0, clean_text=False): Audio data as numpy array """ print(f"Generating audio for text: {text}") - return self.model.generate(text, voice=voice, speed=speed, clean_text=clean_text) + try: + audio = self.model.generate(text, voice=voice, speed=speed, clean_text=clean_text) + except Exception as exc: + self._track_generation(voice, generation="wav", sdk_error_code=error_code(exc)) + raise + self._track_generation(voice, generation="wav") + return audio def generate_stream(self, text, voice="expr-voice-5-m", speed=1.0, clean_text=False): """Generate audio as a stream of chunks. @@ -43,7 +59,12 @@ def generate_stream(self, text, voice="expr-voice-5-m", speed=1.0, clean_text=Fa Yields: numpy.ndarray: Audio data for each text chunk. """ - yield from self.model.generate_stream(text, voice=voice, speed=speed, clean_text=clean_text) + try: + yield from self.model.generate_stream(text, voice=voice, speed=speed, clean_text=clean_text) + except Exception as exc: + self._track_generation(voice, generation="speak", sdk_error_code=error_code(exc)) + raise + self._track_generation(voice, generation="speak") def generate_to_file(self, text, output_path, voice="expr-voice-5-m", speed=1.0, sample_rate=24000): """Generate audio from text and save to file. @@ -55,13 +76,29 @@ def generate_to_file(self, text, output_path, voice="expr-voice-5-m", speed=1.0, speed: Speech speed (1.0 = normal) sample_rate: Audio sample rate """ - return self.model.generate_to_file(text, output_path, voice=voice, speed=speed, sample_rate=sample_rate) + try: + result = self.model.generate_to_file(text, output_path, voice=voice, speed=speed, sample_rate=sample_rate) + except Exception as exc: + self._track_generation(voice, generation="wav", sdk_error_code=error_code(exc)) + raise + self._track_generation(voice, generation="wav") + return result @property def available_voices(self): """Get list of available voices.""" return self.model.all_voice_names + def _track_generation(self, voice, generation, sdk_error_code=None): + try: + self.analytics.track_generation( + selected_voice=voice, + generation=generation, + sdk_error_code=sdk_error_code, + ) + except Exception: + return + def download_from_huggingface(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=None, backend=None): """Download model files from Hugging Face repository. @@ -73,7 +110,10 @@ def download_from_huggingface(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir= Returns: KittenTTS_1_Onnx: Instantiated model ready for use """ + from huggingface_hub import hf_hub_download + # Download config file first + config_was_cached = _is_cached(repo_id, "config.json", cache_dir) config_path = hf_hub_download( repo_id=repo_id, filename="config.json", @@ -87,6 +127,9 @@ def download_from_huggingface(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir= if config.get("type") not in ["ONNX1", "ONNX2"]: raise ValueError("Unsupported model type.") + model_was_cached = _is_cached(repo_id, config["model_file"], cache_dir) + voices_were_cached = _is_cached(repo_id, config["voices"], cache_dir) + # Download model and voices files based on config model_path = hf_hub_download( repo_id=repo_id, @@ -100,12 +143,34 @@ def download_from_huggingface(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir= cache_dir=cache_dir ) + from .onnx_model import KittenTTS_1_Onnx + # Instantiate and return model model = KittenTTS_1_Onnx(model_path=model_path, voices_path=voices_path, speed_priors=config.get("speed_priors", {}) , voice_aliases=config.get("voice_aliases", {}), backend=backend) + model.analytics_asset_source = "cache" if config_was_cached and model_was_cached and voices_were_cached else "runtime-download" return model -def get_model(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=None, backend=None): +def get_model(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=None, backend=None, analytics=True): """Get a KittenTTS model (legacy function for backward compatibility).""" - return KittenTTS(repo_id, cache_dir, backend=backend) + return KittenTTS(repo_id, cache_dir, backend=backend, analytics=analytics) + + +def _is_cached(repo_id, filename, cache_dir): + try: + from huggingface_hub import try_to_load_from_cache + except ImportError: + return False + try: + cached_path = try_to_load_from_cache(repo_id=repo_id, filename=filename, cache_dir=cache_dir) + except Exception: + return False + return isinstance(cached_path, str) and os.path.exists(cached_path) + + +def _sdk_version(): + try: + return metadata.version("kittentts") + except metadata.PackageNotFoundError: + return "unknown" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/test_analytics.py b/tests/test_analytics.py new file mode 100644 index 0000000..eb5e66f --- /dev/null +++ b/tests/test_analytics.py @@ -0,0 +1,149 @@ +import tempfile +import unittest +from pathlib import Path + +from kittentts.analytics import AnalyticsClient, error_code, parse_model_name +from kittentts.get_model import KittenTTS + + +class AnalyticsTests(unittest.TestCase): + def make_client(self, post_json, enabled=True, anonymous_id_path=None): + return AnalyticsClient( + sdk_version="0.8.1", + selected_model="kitten-tts-nano", + model_version="0.8", + asset_source="cache", + enabled=enabled, + anonymous_id_path=anonymous_id_path, + post_json=post_json, + async_delivery=False, + ) + + def test_disabled_analytics_sends_no_request(self): + calls = [] + client = self.make_client(lambda *args: calls.append(args), enabled=False) + + client.track_generation(selected_voice="Jasper", generation="wav") + + self.assertEqual(calls, []) + + def test_success_event_contains_required_fields(self): + calls = [] + client = self.make_client(lambda endpoint, payload, timeout: calls.append((endpoint, payload, timeout))) + + client.track_generation(selected_voice="Jasper", generation="wav") + + self.assertEqual(len(calls), 1) + endpoint, payload, timeout = calls[0] + self.assertEqual(endpoint, "https://kittentts-analytics.dewana-sl.workers.dev/v1/track") + self.assertEqual(timeout, 3.0) + for key in [ + "anonymous_id", + "client_event_id", + "timestamp", + "sdk_version", + "sdk_type", + "platform", + "runtime_version", + "selected_model", + "model_version", + "selected_voice", + "generation", + "asset_source", + ]: + self.assertIn(key, payload) + self.assertTrue(payload[key]) + self.assertNotIn("sdk_error_code", payload) + self.assertNotIn("ip_address", payload) + self.assertNotIn("ip_location", payload) + + def test_failure_event_includes_error_code(self): + calls = [] + client = self.make_client(lambda endpoint, payload, timeout: calls.append(payload)) + + client.track_generation( + selected_voice="Jasper", + generation="wav", + sdk_error_code=error_code(ValueError("bad voice")), + ) + + self.assertEqual(calls[0]["sdk_error_code"], "VALUE_ERROR") + + def test_network_error_does_not_raise(self): + def failing_post(endpoint, payload, timeout): + raise TimeoutError("timed out") + + client = self.make_client(failing_post) + client.track_generation(selected_voice="Jasper", generation="wav") + + def test_model_metadata_parses_variant_version(self): + self.assertEqual( + parse_model_name("KittenML/kitten-tts-nano-0.8-int8"), + {"selected_model": "kitten-tts-nano", "model_version": "0.8-int8"}, + ) + + def test_generate_tracks_success(self): + model = KittenTTS.__new__(KittenTTS) + model.model = DummyModel() + model.analytics = RecordingAnalytics() + + self.assertEqual(model.generate("hello", voice="Jasper"), "audio") + self.assertEqual( + model.analytics.events, + [{"selected_voice": "Jasper", "generation": "wav", "sdk_error_code": None}], + ) + + def test_generate_tracks_failure_and_reraises(self): + model = KittenTTS.__new__(KittenTTS) + model.model = FailingModel() + model.analytics = RecordingAnalytics() + + with self.assertRaises(ValueError): + model.generate("hello", voice="Jasper") + + self.assertEqual( + model.analytics.events, + [{"selected_voice": "Jasper", "generation": "wav", "sdk_error_code": "VALUE_ERROR"}], + ) + + def test_anonymous_id_is_stable_across_clients(self): + with tempfile.TemporaryDirectory() as tmpdir: + path = Path(tmpdir) / "analytics_id" + first = self.make_client(lambda *args: None, anonymous_id_path=path) + second = self.make_client(lambda *args: None, anonymous_id_path=path) + + self.assertEqual(first.anonymous_id, second.anonymous_id) + self.assertEqual(first.anonymous_id, path.read_text(encoding="utf-8")) + + def test_sdk_does_not_import_or_reference_posthog(self): + package_root = Path(__file__).resolve().parents[1] / "kittentts" + + for source_path in package_root.rglob("*.py"): + with self.subTest(path=source_path): + source = source_path.read_text(encoding="utf-8").lower() + self.assertNotIn("posthog", source) + +class DummyModel: + def generate(self, text, voice="expr-voice-5-m", speed=1.0, clean_text=False): + return "audio" + + +class FailingModel: + def generate(self, text, voice="expr-voice-5-m", speed=1.0, clean_text=False): + raise ValueError("bad voice") + + +class RecordingAnalytics: + def __init__(self): + self.events = [] + + def track_generation(self, selected_voice, generation, sdk_error_code=None): + self.events.append({ + "selected_voice": selected_voice, + "generation": generation, + "sdk_error_code": sdk_error_code, + }) + + +if __name__ == "__main__": + unittest.main() From eb63723405d4ef3dfef286a43e71ff0087aaa184 Mon Sep 17 00:00:00 2001 From: Dewan Shakil Date: Wed, 13 May 2026 01:29:53 +0530 Subject: [PATCH 2/7] Improve analytics delivery --- kittentts/analytics.py | 9 +++++++-- tests/test_analytics.py | 45 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/kittentts/analytics.py b/kittentts/analytics.py index 93e1380..7804725 100644 --- a/kittentts/analytics.py +++ b/kittentts/analytics.py @@ -119,7 +119,7 @@ def track_generation( payload["sdk_error_code"] = sdk_error_code if self._async_delivery: - thread = threading.Thread(target=self._send, args=(payload,), daemon=True) + thread = threading.Thread(target=self._send, args=(payload,), daemon=False) thread.start() else: self._send(payload) @@ -133,10 +133,15 @@ def _send(self, payload: Dict[str, str]) -> None: def post_json_request(endpoint: str, payload: Dict[str, str], timeout_seconds: float) -> None: body = json.dumps(payload).encode("utf-8") + sdk_version = str(payload.get("sdk_version") or "unknown").replace("\n", " ").replace("\r", " ") req = request.Request( endpoint, data=body, - headers={"Content-Type": "application/json"}, + headers={ + "Content-Type": "application/json", + "Accept": "application/json", + "User-Agent": f"KittenTTS-Python/{sdk_version}", + }, method="POST", ) with request.urlopen(req, timeout=timeout_seconds) as response: diff --git a/tests/test_analytics.py b/tests/test_analytics.py index eb5e66f..9db992a 100644 --- a/tests/test_analytics.py +++ b/tests/test_analytics.py @@ -1,8 +1,9 @@ import tempfile import unittest from pathlib import Path +from unittest.mock import patch -from kittentts.analytics import AnalyticsClient, error_code, parse_model_name +from kittentts.analytics import AnalyticsClient, error_code, parse_model_name, post_json_request from kittentts.get_model import KittenTTS @@ -76,6 +77,48 @@ def failing_post(endpoint, payload, timeout): client = self.make_client(failing_post) client.track_generation(selected_voice="Jasper", generation="wav") + def test_async_delivery_uses_non_daemon_thread(self): + client = AnalyticsClient( + sdk_version="0.8.1", + selected_model="kitten-tts-nano", + model_version="0.8", + asset_source="cache", + post_json=lambda *args: None, + async_delivery=True, + ) + + with patch("kittentts.analytics.threading.Thread") as thread_class: + client.track_generation(selected_voice="Jasper", generation="wav") + + self.assertFalse(thread_class.call_args.kwargs["daemon"]) + thread_class.return_value.start.assert_called_once() + + def test_post_request_uses_sdk_user_agent(self): + captured = [] + + class DummyResponse: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, traceback): + return False + + def read(self): + return b"{}" + + def fake_urlopen(req, timeout): + captured.append((req, timeout)) + return DummyResponse() + + payload = {"sdk_version": "0.8.1"} + + with patch("kittentts.analytics.request.urlopen", fake_urlopen): + post_json_request("https://example.com/v1/track", payload, 3.0) + + req, timeout = captured[0] + self.assertEqual(timeout, 3.0) + self.assertEqual(req.get_header("User-agent"), "KittenTTS-Python/0.8.1") + def test_model_metadata_parses_variant_version(self): self.assertEqual( parse_model_name("KittenML/kitten-tts-nano-0.8-int8"), From fb4de4515bc23cf4b7bded822daf85340da8f6d0 Mon Sep 17 00:00:00 2001 From: Dewan Shakil Date: Wed, 13 May 2026 13:06:42 +0530 Subject: [PATCH 3/7] Use analytics custom domain --- README.md | 2 +- kittentts/analytics.py | 2 +- tests/test_analytics.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3541eff..c3b64a0 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ print(model.available_voices) ### Analytics -Kitten TTS sends anonymous generation analytics to the KittenTTS ingest API. The SDK does not include PostHog or any analytics-provider SDK. +Kitten TTS sends anonymous generation analytics to the KittenTTS ingest API at `https://kittenmlanalytics.com/v1/track`. The SDK does not include PostHog or any analytics-provider SDK. Analytics can be disabled at model creation: diff --git a/kittentts/analytics.py b/kittentts/analytics.py index 7804725..8cd8994 100644 --- a/kittentts/analytics.py +++ b/kittentts/analytics.py @@ -12,7 +12,7 @@ from typing import Callable, Dict, Optional from urllib import request -ANALYTICS_ENDPOINT = "https://kittentts-analytics.dewana-sl.workers.dev/v1/track" +ANALYTICS_ENDPOINT = "https://kittenmlanalytics.com/v1/track" SDK_TYPE = "python" DEFAULT_TIMEOUT_SECONDS = 3.0 diff --git a/tests/test_analytics.py b/tests/test_analytics.py index 9db992a..af1ed5c 100644 --- a/tests/test_analytics.py +++ b/tests/test_analytics.py @@ -36,7 +36,7 @@ def test_success_event_contains_required_fields(self): self.assertEqual(len(calls), 1) endpoint, payload, timeout = calls[0] - self.assertEqual(endpoint, "https://kittentts-analytics.dewana-sl.workers.dev/v1/track") + self.assertEqual(endpoint, "https://kittenmlanalytics.com/v1/track") self.assertEqual(timeout, 3.0) for key in [ "anonymous_id", From a7f4a3b17f473ee9ac239594dd821971f20cef00 Mon Sep 17 00:00:00 2001 From: Dewan Shakil Date: Fri, 15 May 2026 00:28:31 +0530 Subject: [PATCH 4/7] Harden analytics failure handling --- kittentts/analytics.py | 16 +++++++++++++++- tests/test_analytics.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/kittentts/analytics.py b/kittentts/analytics.py index 8cd8994..93da51a 100644 --- a/kittentts/analytics.py +++ b/kittentts/analytics.py @@ -97,6 +97,17 @@ def track_generation( selected_voice: str, generation: str, sdk_error_code: Optional[str] = None, + ) -> None: + try: + self._track_generation(selected_voice, generation, sdk_error_code=sdk_error_code) + except Exception: + return + + def _track_generation( + self, + selected_voice: str, + generation: str, + sdk_error_code: Optional[str] = None, ) -> None: if not self.enabled: return @@ -152,7 +163,10 @@ def default_anonymous_id_path() -> Path: configured_home = os.environ.get("KITTENTTS_ANALYTICS_HOME") if configured_home: return Path(configured_home).expanduser() / "anonymous_id" - return Path.home() / ".kittentts" / "analytics_id" + try: + return Path.home() / ".kittentts" / "analytics_id" + except RuntimeError: + return Path(os.environ.get("TMPDIR", "/tmp")) / "kittentts" / "analytics_id" def load_or_create_anonymous_id(path: Path) -> str: diff --git a/tests/test_analytics.py b/tests/test_analytics.py index af1ed5c..fdc5f93 100644 --- a/tests/test_analytics.py +++ b/tests/test_analytics.py @@ -77,6 +77,26 @@ def failing_post(endpoint, payload, timeout): client = self.make_client(failing_post) client.track_generation(selected_voice="Jasper", generation="wav") + def test_payload_error_does_not_raise(self): + client = self.make_client(lambda *args: None) + + with patch("kittentts.analytics.uuid.uuid4", side_effect=RuntimeError("uuid failed")): + client.track_generation(selected_voice="Jasper", generation="wav") + + def test_thread_start_error_does_not_raise(self): + client = AnalyticsClient( + sdk_version="0.8.1", + selected_model="kitten-tts-nano", + model_version="0.8", + asset_source="cache", + post_json=lambda *args: None, + async_delivery=True, + ) + + with patch("kittentts.analytics.threading.Thread") as thread_class: + thread_class.return_value.start.side_effect = RuntimeError("thread failed") + client.track_generation(selected_voice="Jasper", generation="wav") + def test_async_delivery_uses_non_daemon_thread(self): client = AnalyticsClient( sdk_version="0.8.1", @@ -149,6 +169,13 @@ def test_generate_tracks_failure_and_reraises(self): [{"selected_voice": "Jasper", "generation": "wav", "sdk_error_code": "VALUE_ERROR"}], ) + def test_generate_ignores_analytics_failure(self): + model = KittenTTS.__new__(KittenTTS) + model.model = DummyModel() + model.analytics = FailingAnalytics() + + self.assertEqual(model.generate("hello", voice="Jasper"), "audio") + def test_anonymous_id_is_stable_across_clients(self): with tempfile.TemporaryDirectory() as tmpdir: path = Path(tmpdir) / "analytics_id" @@ -188,5 +215,10 @@ def track_generation(self, selected_voice, generation, sdk_error_code=None): }) +class FailingAnalytics: + def track_generation(self, selected_voice, generation, sdk_error_code=None): + raise RuntimeError("analytics failed") + + if __name__ == "__main__": unittest.main() From 9f7bfc34a005a9483c1e4a0db04802120e22142d Mon Sep 17 00:00:00 2001 From: Dewan Shakil Date: Sat, 16 May 2026 17:11:41 +0530 Subject: [PATCH 5/7] Move analytics docs out of README --- README.md | 12 +----------- docs/analytics.md | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 11 deletions(-) create mode 100644 docs/analytics.md diff --git a/README.md b/README.md index 41a1e0e..34a65cb 100644 --- a/README.md +++ b/README.md @@ -99,17 +99,7 @@ print(model.available_voices) # ['Bella', 'Jasper', 'Luna', 'Bruno', 'Rosie', 'Hugo', 'Kiki', 'Leo'] ``` -### Analytics - -Kitten TTS sends anonymous generation analytics to the KittenTTS ingest API at `https://kittenmlanalytics.com/v1/track`. The SDK does not include PostHog or any analytics-provider SDK. - -Analytics can be disabled at model creation: - -```python -model = KittenTTS("KittenML/kitten-tts-mini-0.8", analytics=False) -``` - -The SDK generates a persistent anonymous install ID and sends generation metadata such as SDK version, platform, runtime version, selected model, selected voice, generation type, and asset source. Analytics runs in the background with a short timeout and never blocks or fails TTS generation. +Kitten TTS sends anonymous generation analytics; see [`docs/analytics.md`](docs/analytics.md) for details and opt-out. ### Using with GPU diff --git a/docs/analytics.md b/docs/analytics.md new file mode 100644 index 0000000..d4691df --- /dev/null +++ b/docs/analytics.md @@ -0,0 +1,19 @@ +# Analytics + +Kitten TTS sends anonymous generation analytics to the KittenTTS ingest API at +`https://kittenmlanalytics.com/v1/track`. The SDK does not include PostHog or +any analytics-provider SDK, and it does not send input text or generated audio. + +Events include SDK version, SDK type, platform, runtime version, selected model, +model version, selected/default voice, generation type, asset source, and SDK +error code for failed calls. IP address and location are added server-side by +Cloudflare. + +Disable analytics at model creation: + +```python +model = KittenTTS("KittenML/kitten-tts-mini-0.8", analytics=False) +``` + +Analytics runs in the background with a short timeout. Network failures are +swallowed and do not block or fail TTS generation. From cf202b50edea4239289bc375e8f1705004eb854c Mon Sep 17 00:00:00 2001 From: Dewan Shakil Date: Thu, 21 May 2026 03:26:08 +0530 Subject: [PATCH 6/7] Track streaming analytics as stream events --- docs/analytics.md | 9 ++++++--- kittentts/get_model.py | 4 ++-- tests/test_analytics.py | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/docs/analytics.md b/docs/analytics.md index d4691df..b6cd615 100644 --- a/docs/analytics.md +++ b/docs/analytics.md @@ -5,9 +5,12 @@ Kitten TTS sends anonymous generation analytics to the KittenTTS ingest API at any analytics-provider SDK, and it does not send input text or generated audio. Events include SDK version, SDK type, platform, runtime version, selected model, -model version, selected/default voice, generation type, asset source, and SDK -error code for failed calls. IP address and location are added server-side by -Cloudflare. +model version, selected/default voice, generation type (`wav`, `speak`, or +`stream`), asset source, and SDK error code for failed calls. IP address and +location are added server-side by Cloudflare. + +Streaming calls send one `stream` analytics event per stream invocation, not one +event per generated chunk. Disable analytics at model creation: diff --git a/kittentts/get_model.py b/kittentts/get_model.py index 529d38e..113e84b 100644 --- a/kittentts/get_model.py +++ b/kittentts/get_model.py @@ -67,9 +67,9 @@ def generate_stream(self, text, voice="expr-voice-5-m", speed=1.0, clean_text=Fa try: yield from self.model.generate_stream(text, voice=voice, speed=speed, clean_text=clean_text) except Exception as exc: - self._track_generation(voice, generation="speak", sdk_error_code=error_code(exc)) + self._track_generation(voice, generation="stream", sdk_error_code=error_code(exc)) raise - self._track_generation(voice, generation="speak") + self._track_generation(voice, generation="stream") def generate_to_file(self, text, output_path, voice="expr-voice-5-m", speed=1.0, sample_rate=24000): """Generate audio from text and save to file. diff --git a/tests/test_analytics.py b/tests/test_analytics.py index fdc5f93..4d8ea9d 100644 --- a/tests/test_analytics.py +++ b/tests/test_analytics.py @@ -176,6 +176,30 @@ def test_generate_ignores_analytics_failure(self): self.assertEqual(model.generate("hello", voice="Jasper"), "audio") + def test_generate_stream_tracks_one_success_event(self): + model = KittenTTS.__new__(KittenTTS) + model.model = DummyStreamModel() + model.analytics = RecordingAnalytics() + + self.assertEqual(list(model.generate_stream("hello", voice="Jasper")), ["chunk-1", "chunk-2"]) + self.assertEqual( + model.analytics.events, + [{"selected_voice": "Jasper", "generation": "stream", "sdk_error_code": None}], + ) + + def test_generate_stream_tracks_failure_and_reraises(self): + model = KittenTTS.__new__(KittenTTS) + model.model = FailingStreamModel() + model.analytics = RecordingAnalytics() + + with self.assertRaises(ValueError): + list(model.generate_stream("hello", voice="Jasper")) + + self.assertEqual( + model.analytics.events, + [{"selected_voice": "Jasper", "generation": "stream", "sdk_error_code": "VALUE_ERROR"}], + ) + def test_anonymous_id_is_stable_across_clients(self): with tempfile.TemporaryDirectory() as tmpdir: path = Path(tmpdir) / "analytics_id" @@ -198,11 +222,23 @@ def generate(self, text, voice="expr-voice-5-m", speed=1.0, clean_text=False): return "audio" +class DummyStreamModel: + def generate_stream(self, text, voice="expr-voice-5-m", speed=1.0, clean_text=False): + yield "chunk-1" + yield "chunk-2" + + class FailingModel: def generate(self, text, voice="expr-voice-5-m", speed=1.0, clean_text=False): raise ValueError("bad voice") +class FailingStreamModel: + def generate_stream(self, text, voice="expr-voice-5-m", speed=1.0, clean_text=False): + raise ValueError("bad stream") + yield + + class RecordingAnalytics: def __init__(self): self.events = [] From b18c4a175943998b5d455d98c208902ca5e1c8d4 Mon Sep 17 00:00:00 2001 From: Dewan Shakil Date: Thu, 21 May 2026 17:39:22 +0530 Subject: [PATCH 7/7] Use daemon analytics delivery --- kittentts/analytics.py | 2 +- tests/test_analytics.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kittentts/analytics.py b/kittentts/analytics.py index 93da51a..044e944 100644 --- a/kittentts/analytics.py +++ b/kittentts/analytics.py @@ -130,7 +130,7 @@ def _track_generation( payload["sdk_error_code"] = sdk_error_code if self._async_delivery: - thread = threading.Thread(target=self._send, args=(payload,), daemon=False) + thread = threading.Thread(target=self._send, args=(payload,), daemon=True) thread.start() else: self._send(payload) diff --git a/tests/test_analytics.py b/tests/test_analytics.py index 4d8ea9d..110e346 100644 --- a/tests/test_analytics.py +++ b/tests/test_analytics.py @@ -97,7 +97,7 @@ def test_thread_start_error_does_not_raise(self): thread_class.return_value.start.side_effect = RuntimeError("thread failed") client.track_generation(selected_voice="Jasper", generation="wav") - def test_async_delivery_uses_non_daemon_thread(self): + def test_async_delivery_uses_daemon_thread(self): client = AnalyticsClient( sdk_version="0.8.1", selected_model="kitten-tts-nano", @@ -110,7 +110,7 @@ def test_async_delivery_uses_non_daemon_thread(self): with patch("kittentts.analytics.threading.Thread") as thread_class: client.track_generation(selected_voice="Jasper", generation="wav") - self.assertFalse(thread_class.call_args.kwargs["daemon"]) + self.assertTrue(thread_class.call_args.kwargs["daemon"]) thread_class.return_value.start.assert_called_once() def test_post_request_uses_sdk_user_agent(self):