From e784c2d353da1d6607c9b24e730fc12f24834c4e Mon Sep 17 00:00:00 2001 From: jakmro Date: Wed, 8 Apr 2026 15:42:50 +0200 Subject: [PATCH] v1.12.0 --- .gitattributes | 1 + README.md | 239 +++- .../src/main/jniLibs/arm64-v8a/libcactus.a | Bin 84261994 -> 134 bytes cpp/HybridCactus.cpp | 133 ++- cpp/HybridCactus.hpp | 15 + cpp/cactus_ffi.h | 242 +++- example/ios/Podfile.lock | 4 +- .../cactus.framework/Headers/cactus_ffi.h | 242 +++- .../cactus.framework/Headers/cactus_utils.h | 1049 +++++++++++++++-- .../cactus.framework/Headers/engine.h | 200 +++- .../cactus.framework/Headers/gemma_tools.h | 69 +- .../cactus.framework/Headers/graph.h | 86 +- .../cactus.framework/Headers/kernel.h | 131 +- .../cactus.framework/Headers/kernel_utils.h | 193 ++- .../cactus.framework/cactus | Bin 3552640 -> 4341312 bytes .../cactus.framework/Headers/cactus_ffi.h | 242 +++- .../cactus.framework/Headers/cactus_utils.h | 1049 +++++++++++++++-- .../cactus.framework/Headers/engine.h | 200 +++- .../cactus.framework/Headers/gemma_tools.h | 69 +- .../cactus.framework/Headers/graph.h | 86 +- .../cactus.framework/Headers/kernel.h | 131 +- .../cactus.framework/Headers/kernel_utils.h | 193 ++- .../ios-arm64/cactus.framework/cactus | Bin 3511128 -> 4281880 bytes .../generated/shared/c++/HybridCactusSpec.cpp | 3 + .../generated/shared/c++/HybridCactusSpec.hpp | 3 + package.json | 2 +- src/classes/{CactusVAD.ts => CactusAudio.ts} | 45 +- src/classes/CactusLM.ts | 36 + .../{useCactusVAD.ts => useCactusAudio.ts} | 93 +- src/index.tsx | 25 +- src/modelRegistry.ts | 2 +- src/native/Cactus.ts | 121 +- src/specs/Cactus.nitro.ts | 16 + src/types/CactusAudio.ts | 73 ++ src/types/CactusLM.ts | 17 + src/types/CactusSTT.ts | 1 + src/types/CactusVAD.ts | 39 - 37 files changed, 4560 insertions(+), 490 deletions(-) rename src/classes/{CactusVAD.ts => CactusAudio.ts} (72%) rename src/hooks/{useCactusVAD.ts => useCactusAudio.ts} (67%) create mode 100644 src/types/CactusAudio.ts delete mode 100644 src/types/CactusVAD.ts diff --git a/.gitattributes b/.gitattributes index e27f70f..8b5d28d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.pbxproj -text # specific for windows script files *.bat text eol=crlf +android/src/main/jniLibs/arm64-v8a/libcactus.a filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 1a7dd08..e42d6af 100644 --- a/README.md +++ b/README.md @@ -649,18 +649,18 @@ console.log('Language:', result.language); // e.g. 'en' console.log('Confidence:', result.confidence); ``` -## Voice Activity Detection (VAD) +## Audio Processing -The `CactusVAD` class detects speech segments in audio, returning timestamped intervals where speech is present. +The `CactusAudio` class provides voice activity detection (VAD), speaker diarization, and speaker embedding extraction. -### Class +### Voice Activity Detection ```typescript -import { CactusVAD } from 'cactus-react-native'; +import { CactusAudio } from 'cactus-react-native'; -const cactusVAD = new CactusVAD({ model: 'silero-vad' }); +const cactusAudio = new CactusAudio({ model: 'silero-vad' }); -const result = await cactusVAD.vad({ +const result = await cactusAudio.vad({ audio: 'path/to/audio.wav', options: { threshold: 0.5, @@ -674,22 +674,68 @@ console.log('Speech segments:', result.segments); console.log('Total time (ms):', result.totalTime); ``` +### Speaker Diarization + +```typescript +import { CactusAudio } from 'cactus-react-native'; + +const cactusAudio = new CactusAudio({ model: 'silero-vad' }); + +const result = await cactusAudio.diarize({ + audio: 'path/to/audio.wav', + options: { + numSpeakers: 2, + minSpeakers: 1, + maxSpeakers: 4, + } +}); + +console.log('Number of speakers:', result.numSpeakers); +console.log('Scores:', result.scores); +``` + +### Speaker Embedding + +```typescript +import { CactusAudio } from 'cactus-react-native'; + +const cactusAudio = new CactusAudio({ model: 'silero-vad' }); + +const result = await cactusAudio.embedSpeaker({ + audio: 'path/to/audio.wav', +}); + +console.log('Speaker embedding:', result.embedding); +``` + ### Hook ```tsx -import { useCactusVAD } from 'cactus-react-native'; +import { useCactusAudio } from 'cactus-react-native'; const App = () => { - const cactusVAD = useCactusVAD({ model: 'silero-vad' }); + const cactusAudio = useCactusAudio({ model: 'silero-vad' }); const handleVAD = async () => { - const result = await cactusVAD.vad({ + const result = await cactusAudio.vad({ audio: 'path/to/audio.wav', }); console.log('Speech segments:', result.segments); }; - return