diff --git a/.gitattributes b/.gitattributes index e27f70f..8b5d28d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.pbxproj -text # specific for windows script files *.bat text eol=crlf +android/src/main/jniLibs/arm64-v8a/libcactus.a filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 1a7dd08..e42d6af 100644 --- a/README.md +++ b/README.md @@ -649,18 +649,18 @@ console.log('Language:', result.language); // e.g. 'en' console.log('Confidence:', result.confidence); ``` -## Voice Activity Detection (VAD) +## Audio Processing -The `CactusVAD` class detects speech segments in audio, returning timestamped intervals where speech is present. +The `CactusAudio` class provides voice activity detection (VAD), speaker diarization, and speaker embedding extraction. -### Class +### Voice Activity Detection ```typescript -import { CactusVAD } from 'cactus-react-native'; +import { CactusAudio } from 'cactus-react-native'; -const cactusVAD = new CactusVAD({ model: 'silero-vad' }); +const cactusAudio = new CactusAudio({ model: 'silero-vad' }); -const result = await cactusVAD.vad({ +const result = await cactusAudio.vad({ audio: 'path/to/audio.wav', options: { threshold: 0.5, @@ -674,22 +674,68 @@ console.log('Speech segments:', result.segments); console.log('Total time (ms):', result.totalTime); ``` +### Speaker Diarization + +```typescript +import { CactusAudio } from 'cactus-react-native'; + +const cactusAudio = new CactusAudio({ model: 'silero-vad' }); + +const result = await cactusAudio.diarize({ + audio: 'path/to/audio.wav', + options: { + numSpeakers: 2, + minSpeakers: 1, + maxSpeakers: 4, + } +}); + +console.log('Number of speakers:', result.numSpeakers); +console.log('Scores:', result.scores); +``` + +### Speaker Embedding + +```typescript +import { CactusAudio } from 'cactus-react-native'; + +const cactusAudio = new CactusAudio({ model: 'silero-vad' }); + +const result = await cactusAudio.embedSpeaker({ + audio: 'path/to/audio.wav', +}); + +console.log('Speaker embedding:', result.embedding); +``` + ### Hook ```tsx -import { useCactusVAD } from 'cactus-react-native'; +import { useCactusAudio } from 'cactus-react-native'; const App = () => { - const cactusVAD = useCactusVAD({ model: 'silero-vad' }); + const cactusAudio = useCactusAudio({ model: 'silero-vad' }); const handleVAD = async () => { - const result = await cactusVAD.vad({ + const result = await cactusAudio.vad({ audio: 'path/to/audio.wav', }); console.log('Speech segments:', result.segments); }; - return