From 455379675edc5d29637253ce0a4fafa5982f72a6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 4 Jun 2026 11:09:08 +0000 Subject: [PATCH] docs: update for pipecat PR #4588 - Add deprecation warning to AICFilter.create_vad_analyzer() - Update all examples to use AIC_SDK_LICENSE instead of AIC_LICENSE_KEY - Add new AICQuailVADAnalyzer documentation page - Update migration guide with environment variable change - Replace deprecated VAD method usage with AICQuailVADAnalyzer in examples --- .../server/utilities/audio/aic-filter.mdx | 50 ++-- .../audio/aic-quail-vad-analyzer.mdx | 224 ++++++++++++++++++ docs.json | 5 + 3 files changed, 260 insertions(+), 19 deletions(-) create mode 100644 api-reference/server/utilities/audio/aic-quail-vad-analyzer.mdx diff --git a/api-reference/server/utilities/audio/aic-filter.mdx b/api-reference/server/utilities/audio/aic-filter.mdx index 31cc9f0c..1be7af74 100644 --- a/api-reference/server/utilities/audio/aic-filter.mdx +++ b/api-reference/server/utilities/audio/aic-filter.mdx @@ -60,6 +60,13 @@ Examples: `"quail-vf-2.0-l-16khz"`, `"quail-vf-l-16khz"`, `"quail-s-16khz"`, `"q ### create_vad_analyzer + + **Deprecated in 1.4.0**: This method will be removed in Pipecat 1.6.0. Use + [`AICQuailVADAnalyzer`](/api-reference/server/utilities/audio/aic-quail-vad-analyzer) + instead, which provides a standalone Quail VAD 2.0 model that works + independently of the enhancement filter. + + Creates an `AICVADAnalyzer` that uses the AIC model's built-in voice activity detection. ```python @@ -120,25 +127,30 @@ await task.queue_frame(FilterEnableFrame(True)) ## Usage Examples -### Basic Usage with AIC VAD +### Basic Usage with Quail VAD 2.0 -The recommended approach is to use `AICFilter` with its built-in VAD analyzer: +The recommended approach is to use `AICFilter` for enhancement and `AICQuailVADAnalyzer` for voice activity detection: ```python from pipecat.audio.filters.aic_filter import AICFilter +from pipecat.audio.vad.aic_quail_vad import AICQuailVADAnalyzer from pipecat.processors.aggregators.llm_response_universal import ( LLMContextAggregatorPair, LLMUserAggregatorParams, ) from pipecat.transports.services.daily import DailyTransport, DailyParams -# Create the AIC filter +# Create the AIC filter for enhancement aic_filter = AICFilter( license_key=os.environ["AIC_SDK_LICENSE"], model_id="quail-vf-2.0-l-16khz", ) -# Use AIC's integrated VAD +# Create standalone Quail VAD 2.0 analyzer +aic_vad = AICQuailVADAnalyzer( + license_key=os.environ["AIC_SDK_LICENSE"], +) + transport = DailyTransport( room_url, token, @@ -153,11 +165,7 @@ transport = DailyTransport( user_aggregator, assistant_aggregator = LLMContextAggregatorPair( context, user_params=LLMUserAggregatorParams( - vad_analyzer=aic_filter.create_vad_analyzer( - speech_hold_duration=0.05, - minimum_speech_duration=0.0, - sensitivity=6.0, - ), + vad_analyzer=aic_vad, ), ) ``` @@ -216,6 +224,7 @@ The AIC filter works with any Pipecat transport: ```python from pipecat.audio.filters.aic_filter import AICFilter +from pipecat.audio.vad.aic_quail_vad import AICQuailVADAnalyzer from pipecat.processors.aggregators.llm_response_universal import ( LLMContextAggregatorPair, LLMUserAggregatorParams, @@ -227,6 +236,10 @@ aic_filter = AICFilter( model_id="quail-vf-2.0-l-16khz", ) +aic_vad = AICQuailVADAnalyzer( + license_key=os.environ["AIC_SDK_LICENSE"], +) + transport = FastAPIWebsocketTransport( params=FastAPIWebsocketParams( audio_in_enabled=True, @@ -238,10 +251,7 @@ transport = FastAPIWebsocketTransport( user_aggregator, assistant_aggregator = LLMContextAggregatorPair( context, user_params=LLMUserAggregatorParams( - vad_analyzer=aic_filter.create_vad_analyzer( - speech_hold_duration=0.05, - sensitivity=6.0, - ), + vad_analyzer=aic_vad, ), ) ``` @@ -277,11 +287,12 @@ The AIC filter enhances audio before it reaches the VAD and STT stages, improvin ### Migration Steps -1. Update Pipecat to the latest version (aic-sdk v2.x is included automatically). -2. Remove deprecated constructor parameters (`model_type`, `voice_gain`, `noise_gate_enable`). -3. Add `model_id` parameter with an appropriate model (e.g., `"quail-vf-l-16khz"`). -4. Update any runtime VAD adjustments to use the new VAD context API. -5. We recommend to use `aic_filter.create_vad_analyzer()` for improved accuracy. +1. Update Pipecat to the latest version (aic-sdk v2.3.0+ is included automatically). +2. **Update environment variable**: Change `AIC_LICENSE_KEY` to `AIC_SDK_LICENSE` in your `.env` file. +3. Remove deprecated constructor parameters (`model_type`, `voice_gain`, `noise_gate_enable`). +4. Add `model_id` parameter with an appropriate model (e.g., `"quail-vf-2.0-l-16khz"`). +5. **For VAD**: Replace `aic_filter.create_vad_analyzer()` with `AICQuailVADAnalyzer` for improved accuracy and independence from the enhancement filter. +6. Update any runtime VAD adjustments to use the new VAD context API. ### Breaking Changes @@ -295,11 +306,12 @@ The AIC filter enhances audio before it reaches the VAD and STT stages, improvin ## Notes - Requires ai-coustics license key (get one at [developers.ai-coustics.io](https://developers.ai-coustics.io)) +- **Environment variable**: Use `AIC_SDK_LICENSE` (not `AIC_LICENSE_KEY`) for authentication - Voice Focus 2.0 models are supported with aic-sdk 2.1.0+ (included in pipecat-ai[aic]) - Models are automatically downloaded and cached on first use - Supports real-time audio processing with low latency - Handles PCM_16 audio format (int16 samples) - Thread-safe for pipeline processing - Can be dynamically enabled/disabled via `FilterEnableFrame` -- Integrated VAD provides better accuracy than standalone VAD when using enhancement +- **For VAD**: Use [`AICQuailVADAnalyzer`](/api-reference/server/utilities/audio/aic-quail-vad-analyzer) instead of the deprecated `create_vad_analyzer()` method - For available models, visit [artifacts.ai-coustics.io](https://artifacts.ai-coustics.io/) diff --git a/api-reference/server/utilities/audio/aic-quail-vad-analyzer.mdx b/api-reference/server/utilities/audio/aic-quail-vad-analyzer.mdx new file mode 100644 index 00000000..75c1be49 --- /dev/null +++ b/api-reference/server/utilities/audio/aic-quail-vad-analyzer.mdx @@ -0,0 +1,224 @@ +--- +title: "AICQuailVADAnalyzer" +description: "Standalone Quail VAD 2.0 voice activity detection analyzer" +--- + +## Overview + +`AICQuailVADAnalyzer` is a standalone voice activity detection (VAD) analyzer powered by ai-coustics' Quail VAD 2.0 model. Unlike the deprecated `AICVADAnalyzer` which relies on `AICFilter`'s internal VAD, this analyzer owns its own dedicated processor and can be placed anywhere in the pipeline, working independently of audio enhancement. + +The analyzer provides noise-robust speech detection using a specialized Quail VAD-only model, making it ideal for detecting speech in challenging acoustic environments. + +To use AIC, you need a license key. Get started at [ai-coustics.com](https://ai-coustics.com/pipecat). + +## Installation + +The AIC Quail VAD analyzer requires additional dependencies: + +```bash +uv add "pipecat-ai[aic]" +``` + +## Constructor Parameters + + + ai-coustics SDK license key for authentication. Get your key at + [developers.ai-coustics.io](https://developers.ai-coustics.io). + + + + Quail VAD model identifier. Defaults to the published standalone VAD model + `"quail-vad-2.0-xxs-16khz"`. See + [artifacts.ai-coustics.io](https://artifacts.ai-coustics.io/) for the + catalogue. Ignored if `model_path` is provided. + + + + Optional path to a local `.aicmodel` file. Overrides `model_id` when set. + Useful for offline deployments or custom models. + + + + Directory for downloaded models. Defaults to `~/.cache/pipecat/aic-models`. + + + + Seconds the VAD continues reporting speech after the signal stops containing + speech. Range: `0.0` to `300x` the model window length. Default (SDK): `0.03s` + + + + Seconds of speech required before the VAD reports speech detected. Range: + `0.0` to `1.0`. Default (SDK): `0.0s` + + + + Speech-probability threshold for dedicated Quail VAD models. Range: `0.0` to + `1.0`. Values above this threshold are considered speech. Default is + model-specific. + +Note: This differs from the deprecated `AICVADAnalyzer` which used an +energy-based threshold in range `1.0` to `15.0`. + + + + + Initial sample rate; the pipeline will set this via `set_sample_rate` once the + transport rate is known. + + + + Optional `VADParams` for the base VAD state machine configuration. + + +## Usage Examples + +### Basic Usage + +The recommended approach for AIC-powered voice detection: + +```python +import os +from pipecat.audio.filters.aic_filter import AICFilter +from pipecat.audio.vad.aic_quail_vad import AICQuailVADAnalyzer +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.transports.services.daily import DailyTransport, DailyParams + +# Create the AIC filter for audio enhancement +aic_filter = AICFilter( + license_key=os.environ["AIC_SDK_LICENSE"], + model_id="quail-vf-2.0-l-16khz", +) + +# Create standalone Quail VAD 2.0 analyzer +aic_vad = AICQuailVADAnalyzer( + license_key=os.environ["AIC_SDK_LICENSE"], +) + +transport = DailyTransport( + room_url, + token, + "Bot", + DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + audio_in_filter=aic_filter, + ), +) + +user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams( + vad_analyzer=aic_vad, + ), +) +``` + +### With Custom VAD Parameters + +Fine-tune the VAD behavior for your specific use case: + +```python +from pipecat.audio.vad.aic_quail_vad import AICQuailVADAnalyzer + +aic_vad = AICQuailVADAnalyzer( + license_key=os.environ["AIC_SDK_LICENSE"], + speech_hold_duration=0.05, # Hold speech detection for 50ms after speech ends + minimum_speech_duration=0.1, # Require 100ms of speech before triggering + sensitivity=0.5, # Speech probability threshold (0.0-1.0) +) +``` + +### VAD-Only (Without Enhancement) + +Use Quail VAD without audio enhancement: + +```python +from pipecat.audio.vad.aic_quail_vad import AICQuailVADAnalyzer +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.transports.services.daily import DailyTransport, DailyParams + +# Just VAD, no enhancement filter +aic_vad = AICQuailVADAnalyzer( + license_key=os.environ["AIC_SDK_LICENSE"], +) + +transport = DailyTransport( + room_url, + token, + "Bot", + DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + # No audio_in_filter - raw audio goes directly to VAD + ), +) + +user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams( + vad_analyzer=aic_vad, + ), +) +``` + +### Using a Local Model + +For offline deployments or custom Quail VAD models: + +```python +from pathlib import Path +from pipecat.audio.vad.aic_quail_vad import AICQuailVADAnalyzer + +aic_vad = AICQuailVADAnalyzer( + license_key=os.environ["AIC_SDK_LICENSE"], + model_path=Path("/path/to/your/quail-vad-model.aicmodel"), +) +``` + + + See the [AIC Quail VAD + example](https://github.com/pipecat-ai/pipecat/blob/main/examples/voice/voice-aicoustics-vad-only.py) + for a complete working example with detailed logging. + + +## Comparison to Deprecated AICVADAnalyzer + +| Feature | AICQuailVADAnalyzer (Recommended) | AICVADAnalyzer (Deprecated) | +| -------------------- | ------------------------------------------------ | --------------------------------------- | +| **Model** | Standalone Quail VAD 2.0 | Enhancement model's internal VAD | +| **Independence** | Owns its own processor | Bound to `AICFilter` instance | +| **Audio path** | Processes whatever the pipeline feeds it | Reads post-enhancement VAD state | +| **Sensitivity** | Probability threshold (0.0-1.0) | Energy threshold (1.0-15.0) | +| **Placement** | Can be placed anywhere in pipeline | Must follow `AICFilter` | +| **Use case** | Noise-robust VAD as primary differentiator | Legacy coupling to enhancement pipeline | +| **Removal timeline** | N/A (current recommended approach) | Will be removed in Pipecat 1.6.0 | + +## Audio Flow + +```mermaid +graph TD + A[AudioRawFrame] --> B[AICFilter
optional enhancement] + B --> C[AICQuailVADAnalyzer
standalone VAD] + C --> D[STT] +``` + +The Quail VAD analyzer can work with or without the AIC enhancement filter, providing flexibility in your pipeline architecture. + +## Notes + +- Requires ai-coustics license key (get one at [developers.ai-coustics.io](https://developers.ai-coustics.io)) +- **Environment variable**: Use `AIC_SDK_LICENSE` for authentication +- Default model is `quail-vad-2.0-xxs-16khz`, optimized for 16kHz audio +- Model is downloaded and cached on first use +- Works independently of `AICFilter` - can be used with or without audio enhancement +- Provides noise-robust speech detection in challenging acoustic environments +- Handles PCM_16 audio format (int16 samples) +- Thread-safe for pipeline processing +- For available models, visit [artifacts.ai-coustics.io](https://artifacts.ai-coustics.io/) diff --git a/docs.json b/docs.json index d53c2f88..890a18bc 100644 --- a/docs.json +++ b/docs.json @@ -477,6 +477,7 @@ "group": "Audio Processing", "pages": [ "api-reference/server/utilities/audio/aic-filter", + "api-reference/server/utilities/audio/aic-quail-vad-analyzer", "api-reference/server/utilities/audio/audio-buffer-processor", "api-reference/server/utilities/audio/koala-filter", "api-reference/server/utilities/audio/krisp-viva-filter", @@ -1926,6 +1927,10 @@ "source": "/server/utilities/audio/aic-filter", "destination": "/api-reference/server/utilities/audio/aic-filter" }, + { + "source": "/server/utilities/audio/aic-quail-vad-analyzer", + "destination": "/api-reference/server/utilities/audio/aic-quail-vad-analyzer" + }, { "source": "/server/utilities/audio/audio-buffer-processor", "destination": "/api-reference/server/utilities/audio/audio-buffer-processor"