diff --git a/.gitattributes b/.gitattributes
index e27f70f..8b5d28d 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,4 @@
*.pbxproj -text
# specific for windows script files
*.bat text eol=crlf
+android/src/main/jniLibs/arm64-v8a/libcactus.a filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
index 1a7dd08..e42d6af 100644
--- a/README.md
+++ b/README.md
@@ -649,18 +649,18 @@ console.log('Language:', result.language); // e.g. 'en'
console.log('Confidence:', result.confidence);
```
-## Voice Activity Detection (VAD)
+## Audio Processing
-The `CactusVAD` class detects speech segments in audio, returning timestamped intervals where speech is present.
+The `CactusAudio` class provides voice activity detection (VAD), speaker diarization, and speaker embedding extraction.
-### Class
+### Voice Activity Detection
```typescript
-import { CactusVAD } from 'cactus-react-native';
+import { CactusAudio } from 'cactus-react-native';
-const cactusVAD = new CactusVAD({ model: 'silero-vad' });
+const cactusAudio = new CactusAudio({ model: 'silero-vad' });
-const result = await cactusVAD.vad({
+const result = await cactusAudio.vad({
audio: 'path/to/audio.wav',
options: {
threshold: 0.5,
@@ -674,22 +674,68 @@ console.log('Speech segments:', result.segments);
console.log('Total time (ms):', result.totalTime);
```
+### Speaker Diarization
+
+```typescript
+import { CactusAudio } from 'cactus-react-native';
+
+const cactusAudio = new CactusAudio({ model: 'silero-vad' });
+
+const result = await cactusAudio.diarize({
+ audio: 'path/to/audio.wav',
+ options: {
+ numSpeakers: 2,
+ minSpeakers: 1,
+ maxSpeakers: 4,
+ }
+});
+
+console.log('Number of speakers:', result.numSpeakers);
+console.log('Scores:', result.scores);
+```
+
+### Speaker Embedding
+
+```typescript
+import { CactusAudio } from 'cactus-react-native';
+
+const cactusAudio = new CactusAudio({ model: 'silero-vad' });
+
+const result = await cactusAudio.embedSpeaker({
+ audio: 'path/to/audio.wav',
+});
+
+console.log('Speaker embedding:', result.embedding);
+```
+
### Hook
```tsx
-import { useCactusVAD } from 'cactus-react-native';
+import { useCactusAudio } from 'cactus-react-native';
const App = () => {
- const cactusVAD = useCactusVAD({ model: 'silero-vad' });
+ const cactusAudio = useCactusAudio({ model: 'silero-vad' });
const handleVAD = async () => {
- const result = await cactusVAD.vad({
+ const result = await cactusAudio.vad({
audio: 'path/to/audio.wav',
});
console.log('Speech segments:', result.segments);
};
- return ;
+ const handleDiarize = async () => {
+ const result = await cactusAudio.diarize({
+ audio: 'path/to/audio.wav',
+ });
+ console.log('Speakers:', result.numSpeakers);
+ };
+
+ return (
+ <>
+
+
+ >
+ );
};
```
@@ -985,9 +1031,19 @@ Performs text completion with optional streaming and tool support. Automatically
- `toolRagTopK` - Number of tools to select via RAG when tool list is large (default: `2`).
- `includeStopSequences` - Whether to include stop sequences in the response (default: `false`).
- `useVad` - Whether to use VAD preprocessing (default: `true`).
+ - `enableThinking` - Whether to enable thinking/reasoning output if supported by the model (default: unset).
- `tools` - Array of `CactusLMTool` objects for function calling.
- `onToken` - Callback for streaming tokens.
+**`prefill(params: CactusLMPrefillParams): Promise`**
+
+Runs prompt prefill without generating any output tokens. Useful for measuring prefill performance or warming up the model's KV cache. Automatically calls `init()` if not already initialized. Throws an error if a generation is already in progress.
+
+**Parameters:**
+- `messages` - Array of `CactusLMMessage` objects.
+- `options` - Same options as `complete`.
+- `tools` - Array of `CactusLMTool` objects.
+
**`tokenize(params: CactusLMTokenizeParams): Promise`**
Converts text into tokens using the model's tokenizer.
@@ -1038,7 +1094,7 @@ Returns available models.
**`getModelName(): string`**
-Returns the model slug or path the instance was created with.
+Returns the computed model identifier including quantization and pro suffix (e.g., `'qwen3-0.6b-int8'`, `'lfm2-vl-450m-int4-pro'`).
### useCactusLM Hook
@@ -1121,6 +1177,7 @@ Starts a streaming transcription session. Automatically calls `init()` if not al
- `confirmationThreshold` - Fuzzy match ratio required to confirm a transcription segment (default: `0.99`).
- `minChunkSize` - Minimum number of audio samples before processing (default: `32000`).
- `telemetryEnabled` - Enable telemetry for this session (default: `true`).
+- `language` - Language code for transcription (e.g., `'en'`, `'es'`, `'fr'`). If not set, language is auto-detected.
**`streamTranscribeProcess(params: CactusSTTStreamTranscribeProcessParams): Promise`**
@@ -1167,7 +1224,7 @@ Returns available speech-to-text models.
**`getModelName(): string`**
-Returns the model slug or path the instance was created with.
+Returns the computed model identifier including quantization and pro suffix (e.g., `'whisper-small-int8'`).
### useCactusSTT Hook
@@ -1200,32 +1257,32 @@ The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When
- `destroy(): Promise` - Releases all resources associated with the model. Clears the `transcription`, `streamTranscribeConfirmed`, and `streamTranscribePending` state. Automatically called when the component unmounts.
- `getModels(): Promise` - Returns available speech-to-text models.
-### CactusVAD Class
+### CactusAudio Class
#### Constructor
-**`new CactusVAD(params?: CactusVADParams)`**
+**`new CactusAudio(params?: CactusAudioParams)`**
**Parameters:**
-- `model` - Model slug or absolute path to a VAD model file (default: `'silero-vad'`).
+- `model` - Model slug or absolute path to an audio model file (default: `'silero-vad'`).
- `options` - Model options:
- `quantization` - Quantization type: `'int4'` | `'int8'` (default: `'int8'`).
- `pro` - Enable NPU-accelerated models (default: `false`).
#### Methods
-**`download(params?: CactusVADDownloadParams): Promise`**
+**`download(params?: CactusAudioDownloadParams): Promise`**
-Downloads the VAD model. If the model is already downloaded, returns immediately with progress `1`. Throws an error if a download is already in progress.
+Downloads the audio model. If the model is already downloaded, returns immediately with progress `1`. Throws an error if a download is already in progress.
**Parameters:**
- `onProgress` - Callback for download progress (0-1).
**`init(): Promise`**
-Initializes the VAD model. Safe to call multiple times (idempotent). Throws an error if the model is not downloaded yet.
+Initializes the audio model. Safe to call multiple times (idempotent). Throws an error if the model is not downloaded yet.
-**`vad(params: CactusVADVadParams): Promise`**
+**`vad(params: CactusAudioVADParams): Promise`**
Runs voice activity detection on the given audio. Automatically calls `init()` if not already initialized.
@@ -1243,21 +1300,41 @@ Runs voice activity detection on the given audio. Automatically calls `init()` i
- `minSilenceAtMaxSpeech` - Minimum silence at max speech duration.
- `useMaxPossSilAtMaxSpeech` - Whether to use maximum possible silence at max speech.
+**`diarize(params: CactusAudioDiarizeParams): Promise`**
+
+Runs speaker diarization on the given audio. Automatically calls `init()` if not already initialized.
+
+**Parameters:**
+- `audio` - Path to the audio file or raw PCM samples as a byte array.
+- `options` - Diarize options:
+ - `stepMs` - Step size in milliseconds.
+ - `threshold` - Diarization threshold.
+ - `numSpeakers` - Expected number of speakers.
+ - `minSpeakers` - Minimum number of speakers.
+ - `maxSpeakers` - Maximum number of speakers.
+
+**`embedSpeaker(params: CactusAudioEmbedSpeakerParams): Promise`**
+
+Extracts a speaker embedding vector from the given audio. Automatically calls `init()` if not already initialized.
+
+**Parameters:**
+- `audio` - Path to the audio file or raw PCM samples as a byte array.
+
**`destroy(): Promise`**
Releases all resources associated with the model. Safe to call even if the model is not initialized.
**`getModels(): Promise`**
-Returns available VAD models.
+Returns available audio models.
**`getModelName(): string`**
-Returns the model slug or path the instance was created with.
+Returns the computed model identifier including quantization and pro suffix (e.g., `'silero-vad-int8'`).
-### useCactusVAD Hook
+### useCactusAudio Hook
-The `useCactusVAD` hook manages a `CactusVAD` instance with reactive state. When model parameters (`model`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
+The `useCactusAudio` hook manages a `CactusAudio` instance with reactive state. When model parameters (`model`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
#### State
@@ -1269,11 +1346,13 @@ The `useCactusVAD` hook manages a `CactusVAD` instance with reactive state. When
#### Methods
-- `download(params?: CactusVADDownloadParams): Promise` - Downloads the model. Updates `isDownloading` and `downloadProgress` state during download. Sets `isDownloaded` to `true` on success.
+- `download(params?: CactusAudioDownloadParams): Promise` - Downloads the model. Updates `isDownloading` and `downloadProgress` state during download. Sets `isDownloaded` to `true` on success.
- `init(): Promise` - Initializes the model.
-- `vad(params: CactusVADVadParams): Promise` - Runs voice activity detection.
+- `vad(params: CactusAudioVADParams): Promise` - Runs voice activity detection.
+- `diarize(params: CactusAudioDiarizeParams): Promise` - Runs speaker diarization.
+- `embedSpeaker(params: CactusAudioEmbedSpeakerParams): Promise` - Extracts a speaker embedding.
- `destroy(): Promise` - Releases all resources. Automatically called when the component unmounts.
-- `getModels(): Promise` - Returns available VAD models.
+- `getModels(): Promise` - Returns available audio models.
### CactusIndex Class
@@ -1413,6 +1492,7 @@ interface CactusLMCompleteOptions {
toolRagTopK?: number;
includeStopSequences?: boolean;
useVad?: boolean;
+ enableThinking?: boolean;
}
```
@@ -1446,12 +1526,36 @@ interface CactusLMCompleteParams {
}
```
+### CactusLMPrefillParams
+
+```typescript
+interface CactusLMPrefillParams {
+ messages: CactusLMMessage[];
+ options?: CactusLMCompleteOptions;
+ tools?: CactusLMTool[];
+}
+```
+
+### CactusLMPrefillResult
+
+```typescript
+interface CactusLMPrefillResult {
+ success: boolean;
+ error: string | null;
+ prefillTokens: number;
+ prefillTps: number;
+ totalTimeMs: number;
+ ramUsageMb: number;
+}
+```
+
### CactusLMCompleteResult
```typescript
interface CactusLMCompleteResult {
success: boolean;
response: string;
+ thinking?: string;
functionCalls?: {
name: string;
arguments: { [key: string]: any };
@@ -1658,6 +1762,7 @@ interface CactusSTTStreamTranscribeStartOptions {
confirmationThreshold?: number;
minChunkSize?: number;
telemetryEnabled?: boolean;
+ language?: string;
}
```
@@ -1728,27 +1833,27 @@ interface CactusSTTDetectLanguageResult {
}
```
-### CactusVADParams
+### CactusAudioParams
```typescript
-interface CactusVADParams {
+interface CactusAudioParams {
model?: string;
options?: CactusModelOptions;
}
```
-### CactusVADDownloadParams
+### CactusAudioDownloadParams
```typescript
-interface CactusVADDownloadParams {
+interface CactusAudioDownloadParams {
onProgress?: (progress: number) => void;
}
```
-### CactusVADOptions
+### CactusAudioVADOptions
```typescript
-interface CactusVADOptions {
+interface CactusAudioVADOptions {
threshold?: number;
negThreshold?: number;
minSpeechDurationMs?: number;
@@ -1762,31 +1867,85 @@ interface CactusVADOptions {
}
```
-### CactusVADSegment
+### CactusAudioVADSegment
```typescript
-interface CactusVADSegment {
+interface CactusAudioVADSegment {
start: number;
end: number;
}
```
-### CactusVADResult
+### CactusAudioVADResult
```typescript
-interface CactusVADResult {
- segments: CactusVADSegment[];
+interface CactusAudioVADResult {
+ segments: CactusAudioVADSegment[];
totalTime: number;
ramUsage: number;
}
```
-### CactusVADVadParams
+### CactusAudioVADParams
```typescript
-interface CactusVADVadParams {
+interface CactusAudioVADParams {
audio: string | number[];
- options?: CactusVADOptions;
+ options?: CactusAudioVADOptions;
+}
+```
+
+### CactusAudioDiarizeOptions
+
+```typescript
+interface CactusAudioDiarizeOptions {
+ stepMs?: number;
+ threshold?: number;
+ numSpeakers?: number;
+ minSpeakers?: number;
+ maxSpeakers?: number;
+}
+```
+
+### CactusAudioDiarizeParams
+
+```typescript
+interface CactusAudioDiarizeParams {
+ audio: string | number[];
+ options?: CactusAudioDiarizeOptions;
+}
+```
+
+### CactusAudioDiarizeResult
+
+```typescript
+interface CactusAudioDiarizeResult {
+ success: boolean;
+ error: string | null;
+ numSpeakers: number;
+ scores: number[];
+ totalTimeMs: number;
+ ramUsageMb: number;
+}
+```
+
+### CactusAudioEmbedSpeakerParams
+
+```typescript
+interface CactusAudioEmbedSpeakerParams {
+ audio: string | number[];
+}
+```
+
+### CactusAudioEmbedSpeakerResult
+
+```typescript
+interface CactusAudioEmbedSpeakerResult {
+ success: boolean;
+ error: string | null;
+ embedding: number[];
+ totalTimeMs: number;
+ ramUsageMb: number;
}
```
diff --git a/android/src/main/jniLibs/arm64-v8a/libcactus.a b/android/src/main/jniLibs/arm64-v8a/libcactus.a
index a51d8b0..c3d6138 100644
Binary files a/android/src/main/jniLibs/arm64-v8a/libcactus.a and b/android/src/main/jniLibs/arm64-v8a/libcactus.a differ
diff --git a/cpp/HybridCactus.cpp b/cpp/HybridCactus.cpp
index 6a35bb2..156c99b 100644
--- a/cpp/HybridCactus.cpp
+++ b/cpp/HybridCactus.cpp
@@ -65,7 +65,8 @@ std::shared_ptr> HybridCactus::complete(
responseBuffer.data(), responseBufferSize,
optionsJson ? optionsJson->c_str() : nullptr,
toolsJson ? toolsJson->c_str() : nullptr,
- cactusTokenCallback, &callbackCtx);
+ cactusTokenCallback, &callbackCtx,
+ nullptr, 0);
if (result < 0) {
throw std::runtime_error("Cactus complete failed: " +
@@ -79,6 +80,38 @@ std::shared_ptr> HybridCactus::complete(
});
}
+std::shared_ptr> HybridCactus::prefill(
+ const std::string &messagesJson, double responseBufferSize,
+ const std::optional &optionsJson,
+ const std::optional &toolsJson) {
+ return Promise::async([this, messagesJson, responseBufferSize,
+ optionsJson,
+ toolsJson]() -> std::string {
+ std::lock_guard lock(this->_modelMutex);
+
+ if (!this->_model) {
+ throw std::runtime_error("Cactus model is not initialized");
+ }
+
+ std::string responseBuffer;
+ responseBuffer.resize(responseBufferSize);
+
+ int result = cactus_prefill(this->_model, messagesJson.c_str(),
+ responseBuffer.data(), responseBufferSize,
+ optionsJson ? optionsJson->c_str() : nullptr,
+ toolsJson ? toolsJson->c_str() : nullptr,
+ nullptr, 0);
+
+ if (result < 0) {
+ throw std::runtime_error("Cactus prefill failed: " +
+ std::string(cactus_get_last_error()));
+ }
+
+ responseBuffer.resize(strlen(responseBuffer.c_str()));
+ return responseBuffer;
+ });
+}
+
std::shared_ptr>>
HybridCactus::tokenize(const std::string &text) {
return Promise>::async([this,
@@ -488,6 +521,102 @@ HybridCactus::audioEmbed(const std::string &audioPath,
});
}
+std::shared_ptr> HybridCactus::diarize(
+ const std::variant, std::string> &audio,
+ double responseBufferSize,
+ const std::optional &optionsJson) {
+ return Promise::async(
+ [this, audio, responseBufferSize, optionsJson]() -> std::string {
+ std::lock_guard lock(this->_modelMutex);
+
+ if (!this->_model) {
+ throw std::runtime_error("Cactus model is not initialized");
+ }
+
+ std::string responseBuffer;
+ responseBuffer.resize(responseBufferSize);
+
+ int result;
+ if (std::holds_alternative(audio)) {
+ result = cactus_diarize(
+ this->_model, std::get(audio).c_str(),
+ responseBuffer.data(), responseBufferSize,
+ optionsJson ? optionsJson->c_str() : nullptr, nullptr, 0);
+ } else {
+ const auto &audioDoubles = std::get>(audio);
+
+ std::vector audioBytes;
+ audioBytes.reserve(audioDoubles.size());
+ for (double d : audioDoubles) {
+ d = std::clamp(d, 0.0, 255.0);
+ audioBytes.emplace_back(static_cast(d));
+ }
+
+ result = cactus_diarize(
+ this->_model, nullptr,
+ responseBuffer.data(), responseBufferSize,
+ optionsJson ? optionsJson->c_str() : nullptr,
+ audioBytes.data(), audioBytes.size());
+ }
+
+ if (result < 0) {
+ throw std::runtime_error("Cactus diarize failed: " +
+ std::string(cactus_get_last_error()));
+ }
+
+ responseBuffer.resize(strlen(responseBuffer.c_str()));
+ return responseBuffer;
+ });
+}
+
+std::shared_ptr> HybridCactus::embedSpeaker(
+ const std::variant, std::string> &audio,
+ double responseBufferSize,
+ const std::optional &optionsJson) {
+ return Promise::async(
+ [this, audio, responseBufferSize, optionsJson]() -> std::string {
+ std::lock_guard lock(this->_modelMutex);
+
+ if (!this->_model) {
+ throw std::runtime_error("Cactus model is not initialized");
+ }
+
+ std::string responseBuffer;
+ responseBuffer.resize(responseBufferSize);
+
+ int result;
+ if (std::holds_alternative(audio)) {
+ result = cactus_embed_speaker(
+ this->_model, std::get(audio).c_str(),
+ responseBuffer.data(), responseBufferSize,
+ optionsJson ? optionsJson->c_str() : nullptr, nullptr, 0);
+ } else {
+ const auto &audioDoubles = std::get>(audio);
+
+ std::vector audioBytes;
+ audioBytes.reserve(audioDoubles.size());
+ for (double d : audioDoubles) {
+ d = std::clamp(d, 0.0, 255.0);
+ audioBytes.emplace_back(static_cast(d));
+ }
+
+ result = cactus_embed_speaker(
+ this->_model, nullptr,
+ responseBuffer.data(), responseBufferSize,
+ optionsJson ? optionsJson->c_str() : nullptr,
+ audioBytes.data(), audioBytes.size());
+ }
+
+ if (result < 0) {
+ throw std::runtime_error("Cactus embed speaker failed: " +
+ std::string(cactus_get_last_error()));
+ }
+
+ responseBuffer.resize(strlen(responseBuffer.c_str()));
+ return responseBuffer;
+ });
+}
+
std::shared_ptr> HybridCactus::reset() {
return Promise::async([this]() -> void {
std::lock_guard lock(this->_modelMutex);
@@ -525,7 +654,7 @@ std::shared_ptr> HybridCactus::destroy() {
std::shared_ptr>
HybridCactus::setTelemetryEnvironment(const std::string &cacheDir) {
return Promise::async([cacheDir]() -> void {
- cactus_set_telemetry_environment("react-native", cacheDir.c_str(), "1.10.0");
+ cactus_set_telemetry_environment("react-native", cacheDir.c_str(), "1.12.0");
});
}
diff --git a/cpp/HybridCactus.hpp b/cpp/HybridCactus.hpp
index 2c5db1d..16d57e9 100644
--- a/cpp/HybridCactus.hpp
+++ b/cpp/HybridCactus.hpp
@@ -24,6 +24,11 @@ class HybridCactus : public HybridCactusSpec {
double /* tokenId */)>> &callback)
override;
+ std::shared_ptr> prefill(
+ const std::string &messagesJson, double responseBufferSize,
+ const std::optional &optionsJson,
+ const std::optional &toolsJson) override;
+
std::shared_ptr>>
tokenize(const std::string &text) override;
@@ -67,6 +72,16 @@ class HybridCactus : public HybridCactusSpec {
std::shared_ptr>>
audioEmbed(const std::string &audioPath, double embeddingBufferSize) override;
+ std::shared_ptr>
+ diarize(const std::variant, std::string> &audio,
+ double responseBufferSize,
+ const std::optional &optionsJson) override;
+
+ std::shared_ptr>
+ embedSpeaker(const std::variant, std::string> &audio,
+ double responseBufferSize,
+ const std::optional &optionsJson) override;
+
std::shared_ptr> reset() override;
std::shared_ptr> stop() override;
diff --git a/cpp/cactus_ffi.h b/cpp/cactus_ffi.h
index aa72986..6e35847 100644
--- a/cpp/cactus_ffi.h
+++ b/cpp/cactus_ffi.h
@@ -41,7 +41,20 @@ CACTUS_FFI_EXPORT int cactus_complete(
const char* options_json, // optional
const char* tools_json, // optional
cactus_token_callback callback, // optional
- void* user_data // optional
+ void* user_data, // optional
+ const uint8_t* pcm_buffer, // optional: NULL when not used
+ size_t pcm_buffer_size // optional: 0 when not used
+);
+
+CACTUS_FFI_EXPORT int cactus_prefill(
+ cactus_model_t model,
+ const char* messages_json,
+ char* response_buffer,
+ size_t buffer_size,
+ const char* options_json, // optional
+ const char* tools_json, // optional
+ const uint8_t* pcm_buffer, // optional: NULL when not used
+ size_t pcm_buffer_size // optional: 0 when not used
);
CACTUS_FFI_EXPORT int cactus_tokenize(
@@ -140,6 +153,26 @@ CACTUS_FFI_EXPORT int cactus_vad(
size_t pcm_buffer_size
);
+CACTUS_FFI_EXPORT int cactus_diarize(
+ cactus_model_t model,
+ const char* audio_file_path,
+ char* response_buffer,
+ size_t buffer_size,
+ const char* options_json,
+ const uint8_t* pcm_buffer,
+ size_t pcm_buffer_size
+);
+
+CACTUS_FFI_EXPORT int cactus_embed_speaker(
+ cactus_model_t model,
+ const char* audio_file_path,
+ char* response_buffer,
+ size_t buffer_size,
+ const char* options_json,
+ const uint8_t* pcm_buffer,
+ size_t pcm_buffer_size
+);
+
CACTUS_FFI_EXPORT int cactus_rag_query(
cactus_model_t model,
const char* query,
@@ -148,7 +181,6 @@ CACTUS_FFI_EXPORT int cactus_rag_query(
size_t top_k
);
-
CACTUS_FFI_EXPORT cactus_index_t cactus_index_init(
const char* index_dir,
size_t embedding_dim
@@ -199,11 +231,217 @@ CACTUS_FFI_EXPORT void cactus_index_destroy(cactus_index_t index);
CACTUS_FFI_EXPORT const char* cactus_get_last_error(void);
+// level: 0=DEBUG, 1=INFO, 2=WARN (default), 3=ERROR, 4=NONE
+CACTUS_FFI_EXPORT void cactus_log_set_level(int level);
+
+typedef void (*cactus_log_callback_t)(int level, const char* component, const char* message, void* user_data);
+CACTUS_FFI_EXPORT void cactus_log_set_callback(cactus_log_callback_t callback, void* user_data);
+
CACTUS_FFI_EXPORT void cactus_set_telemetry_environment(const char* framework, const char* cache_location, const char* version);
CACTUS_FFI_EXPORT void cactus_set_app_id(const char* app_id);
CACTUS_FFI_EXPORT void cactus_telemetry_flush(void);
CACTUS_FFI_EXPORT void cactus_telemetry_shutdown(void);
+// cactus graph export
+typedef void* cactus_graph_t;
+typedef uint64_t cactus_node_t;
+
+typedef struct {
+ int32_t precision;
+ size_t rank;
+ size_t shape[8];
+ size_t num_elements;
+ size_t byte_size;
+} cactus_tensor_info_t;
+
+CACTUS_FFI_EXPORT cactus_graph_t cactus_graph_create(void);
+CACTUS_FFI_EXPORT void cactus_graph_destroy(cactus_graph_t graph);
+CACTUS_FFI_EXPORT int cactus_graph_hard_reset(cactus_graph_t graph);
+
+CACTUS_FFI_EXPORT int cactus_graph_input(
+ cactus_graph_t graph, const size_t* shape, size_t rank, int32_t precision,
+cactus_node_t* out_node);
+
+CACTUS_FFI_EXPORT int cactus_graph_set_input(
+ cactus_graph_t graph, cactus_node_t node, const void* data, int32_t
+precision);
+CACTUS_FFI_EXPORT int cactus_graph_set_external_input(
+ cactus_graph_t graph, cactus_node_t node, void* data, int32_t precision);
+
+CACTUS_FFI_EXPORT int cactus_graph_precision_cast(
+ cactus_graph_t graph, cactus_node_t input, int32_t target_precision, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_quantize_activations(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_add(cactus_graph_t graph, cactus_node_t a,
+cactus_node_t b, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_add_clipped(cactus_graph_t graph, cactus_node_t a,
+cactus_node_t b, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_subtract(cactus_graph_t graph, cactus_node_t
+a, cactus_node_t b, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_multiply(cactus_graph_t graph, cactus_node_t
+a, cactus_node_t b, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_divide(cactus_graph_t graph, cactus_node_t
+a, cactus_node_t b, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_scalar_add(cactus_graph_t graph, cactus_node_t x, float value, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_subtract(cactus_graph_t graph, cactus_node_t x, float value, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_multiply(cactus_graph_t graph, cactus_node_t x, float value, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_divide(cactus_graph_t graph, cactus_node_t x, float value, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_exp(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_sqrt(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_cos(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_sin(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_log(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_abs(cactus_graph_t graph, cactus_node_t x,
+cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_pow(cactus_graph_t graph, cactus_node_t x,
+float exponent, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_view(
+ cactus_graph_t graph, cactus_node_t x, const size_t* shape, size_t rank,
+cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_flatten(
+ cactus_graph_t graph, cactus_node_t x, int32_t start_dim, int32_t end_dim,
+cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_reshape(
+ cactus_graph_t graph, cactus_node_t x, const size_t* shape, size_t rank, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_transpose(
+ cactus_graph_t graph, cactus_node_t x, int32_t backend, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_transpose_n(
+ cactus_graph_t graph, cactus_node_t x, const size_t* permutation, size_t rank, int32_t backend, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_slice(
+ cactus_graph_t graph, cactus_node_t x, int32_t axis, size_t start, size_t length, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_index(
+ cactus_graph_t graph, cactus_node_t x, size_t index_value, int32_t dim, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_sum(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_mean(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_variance(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_min(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_max(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_concat(
+ cactus_graph_t graph, cactus_node_t a, cactus_node_t b, int32_t axis,
+cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_cat(
+ cactus_graph_t graph, const cactus_node_t* nodes, size_t count, int32_t
+axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_matmul(
+ cactus_graph_t graph, cactus_node_t a, cactus_node_t b, bool pretransposed_rhs, int32_t backend, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gather(
+ cactus_graph_t graph, cactus_node_t tensor, cactus_node_t indices, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_embedding_from_tensor(
+ cactus_graph_t graph, cactus_node_t embedding_tensor, cactus_node_t indices, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_embedding_from_file(
+ cactus_graph_t graph, const char* filename, cactus_node_t indices, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_mmap_embeddings(
+ cactus_graph_t graph, const char* filename, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_mmap_weights(
+ cactus_graph_t graph, const char* filename, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_bilinear_interpolation(
+ cactus_graph_t graph, cactus_node_t pos_embeds, size_t dst_height, size_t dst_width, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_set_grouped_scales(
+ cactus_graph_t graph, cactus_node_t node, size_t group_size, size_t num_groups, void* scales_ptr);
+CACTUS_FFI_EXPORT int cactus_graph_set_interleaved(
+ cactus_graph_t graph, cactus_node_t node, bool interleaved, size_t original_n);
+CACTUS_FFI_EXPORT int cactus_graph_release_weight_pages(cactus_graph_t graph, cactus_node_t node);
+CACTUS_FFI_EXPORT int cactus_graph_prefetch_weight_pages(cactus_graph_t graph, cactus_node_t node);
+CACTUS_FFI_EXPORT int cactus_graph_release_all_weight_pages(cactus_graph_t graph);
+
+CACTUS_FFI_EXPORT int cactus_graph_relu(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_silu(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gelu(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gelu_erf(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_sigmoid(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_tanh(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_glu(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_layernorm(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, cactus_node_t bias, float epsilon, bool has_bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_groupnorm(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, cactus_node_t bias, size_t num_groups, float epsilon, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_batchnorm(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, cactus_node_t bias, cactus_node_t running_mean, cactus_node_t running_var, int32_t axis, float epsilon, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_topk(cactus_graph_t graph, cactus_node_t input, size_t k, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_rms_norm(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, float epsilon, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_rope(
+ cactus_graph_t graph, cactus_node_t input, float theta, size_t position_offset, int32_t backend, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_rope_gptj(
+ cactus_graph_t graph, cactus_node_t input, float theta, size_t position_offset, size_t rot_dim, int32_t backend, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_softmax(cactus_graph_t graph, cactus_node_t input, int32_t axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_attention(
+ cactus_graph_t graph, cactus_node_t query, cactus_node_t key, cactus_node_t value, float scale, bool is_causal, size_t position_offset, size_t window_size, int32_t backend, bool use_mask, cactus_node_t mask, bool additive_mask, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_rel_pos_bias(
+ cactus_graph_t graph, cactus_node_t query, cactus_node_t relative_key, float scale, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_attention_int8_hybrid(
+ cactus_graph_t graph, cactus_node_t query, cactus_node_t key_new, cactus_node_t value_new, float scale, size_t position_offset,
+ const int8_t* cached_keys, const int8_t* cached_values, const float* k_scales, const float* v_scales,
+ size_t cache_len, size_t num_kv_heads, size_t head_dim, size_t window_size, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_conv1d_causal(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, size_t kernel_size, size_t dilation, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv1d_k3(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, size_t stride, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv1d_k7s3(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, cactus_node_t bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv1d(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, size_t stride, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv1d_same_depthwise_k9(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv1d_pointwise(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv2d_k3s2p1(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv2d_depthwise_k3s2p1(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv2d_pointwise_1x1(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_lstm_cell(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t h_prev, cactus_node_t c_prev, cactus_node_t weight_ih, cactus_node_t weight_hh, cactus_node_t bias_ih, cactus_node_t bias_hh, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gated_deltanet_decode(
+ cactus_graph_t graph, cactus_node_t query, cactus_node_t key, cactus_node_t value, cactus_node_t gate_log, cactus_node_t beta, cactus_node_t initial_state, float scale, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gated_deltanet_prefill(
+ cactus_graph_t graph, cactus_node_t query, cactus_node_t key, cactus_node_t value, cactus_node_t gate_log, cactus_node_t beta, cactus_node_t initial_state, size_t chunk_size, float scale, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_stft(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, size_t stride, size_t num_fft_bins, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_altup_predict(
+ cactus_graph_t graph, cactus_node_t coefs, const cactus_node_t* streams, size_t num_streams, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_altup_correct(
+ cactus_graph_t graph, cactus_node_t coefs, cactus_node_t innovation, const cactus_node_t* predictions, size_t num_predictions, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gaussian_topk(
+ cactus_graph_t graph, cactus_node_t input, float ppf, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_moe_layer_gated(
+ cactus_graph_t graph, cactus_node_t hidden, cactus_node_t routing_probs, cactus_node_t topk_indices,
+ const cactus_node_t* w1_weights, const cactus_node_t* w3_weights, const cactus_node_t* w2_weights,
+ size_t num_experts, size_t num_experts_per_tok, bool normalize_routing, float epsilon, float routed_scaling_factor, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_moe_layer_ungated(
+ cactus_graph_t graph, cactus_node_t hidden, cactus_node_t routing_probs, cactus_node_t topk_indices,
+ const cactus_node_t* w1_weights, const cactus_node_t* w2_weights,
+ size_t num_experts, size_t num_experts_per_tok, bool normalize_routing, float epsilon, float routed_scaling_factor, int32_t activation, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_sample(
+ cactus_graph_t graph, cactus_node_t logits, float temperature, float top_p, size_t top_k, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scatter_topk(
+ cactus_graph_t graph, cactus_node_t indices, cactus_node_t values, size_t num_classes, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_persistent(
+ cactus_graph_t graph, cactus_node_t source_node, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_is_populated(
+ cactus_graph_t graph, cactus_node_t persistent_node, int32_t* out_is_populated);
+CACTUS_FFI_EXPORT int cactus_graph_invalidate_persistent(
+ cactus_graph_t graph, cactus_node_t persistent_node);
+
+CACTUS_FFI_EXPORT int cactus_graph_execute(cactus_graph_t graph);
+CACTUS_FFI_EXPORT int cactus_graph_get_output_ptr(cactus_graph_t graph,
+cactus_node_t node, void** out_ptr);
+CACTUS_FFI_EXPORT int cactus_graph_get_output_info(cactus_graph_t graph,
+cactus_node_t node, cactus_tensor_info_t* out_info);
+
#ifdef __cplusplus
}
#endif
diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock
index 8c6e001..a6880d1 100644
--- a/example/ios/Podfile.lock
+++ b/example/ios/Podfile.lock
@@ -1,6 +1,6 @@
PODS:
- boost (1.84.0)
- - Cactus (1.10.4):
+ - Cactus (1.12.0):
- boost
- DoubleConversion
- fast_float
@@ -2643,7 +2643,7 @@ EXTERNAL SOURCES:
SPEC CHECKSUMS:
boost: 7e761d76ca2ce687f7cc98e698152abd03a18f90
- Cactus: effc2b16da1131e7bcf3f101d7ad09abd7231a2c
+ Cactus: ce2107540aec408af1ad4e4a7502a9ab87f5a7eb
DoubleConversion: cb417026b2400c8f53ae97020b2be961b59470cb
fast_float: b32c788ed9c6a8c584d114d0047beda9664e7cc6
FBLazyVector: b8f1312d48447cca7b4abc21ed155db14742bd03
diff --git a/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h
index aa72986..6e35847 100644
--- a/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h
+++ b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h
@@ -41,7 +41,20 @@ CACTUS_FFI_EXPORT int cactus_complete(
const char* options_json, // optional
const char* tools_json, // optional
cactus_token_callback callback, // optional
- void* user_data // optional
+ void* user_data, // optional
+ const uint8_t* pcm_buffer, // optional: NULL when not used
+ size_t pcm_buffer_size // optional: 0 when not used
+);
+
+CACTUS_FFI_EXPORT int cactus_prefill(
+ cactus_model_t model,
+ const char* messages_json,
+ char* response_buffer,
+ size_t buffer_size,
+ const char* options_json, // optional
+ const char* tools_json, // optional
+ const uint8_t* pcm_buffer, // optional: NULL when not used
+ size_t pcm_buffer_size // optional: 0 when not used
);
CACTUS_FFI_EXPORT int cactus_tokenize(
@@ -140,6 +153,26 @@ CACTUS_FFI_EXPORT int cactus_vad(
size_t pcm_buffer_size
);
+CACTUS_FFI_EXPORT int cactus_diarize(
+ cactus_model_t model,
+ const char* audio_file_path,
+ char* response_buffer,
+ size_t buffer_size,
+ const char* options_json,
+ const uint8_t* pcm_buffer,
+ size_t pcm_buffer_size
+);
+
+CACTUS_FFI_EXPORT int cactus_embed_speaker(
+ cactus_model_t model,
+ const char* audio_file_path,
+ char* response_buffer,
+ size_t buffer_size,
+ const char* options_json,
+ const uint8_t* pcm_buffer,
+ size_t pcm_buffer_size
+);
+
CACTUS_FFI_EXPORT int cactus_rag_query(
cactus_model_t model,
const char* query,
@@ -148,7 +181,6 @@ CACTUS_FFI_EXPORT int cactus_rag_query(
size_t top_k
);
-
CACTUS_FFI_EXPORT cactus_index_t cactus_index_init(
const char* index_dir,
size_t embedding_dim
@@ -199,11 +231,217 @@ CACTUS_FFI_EXPORT void cactus_index_destroy(cactus_index_t index);
CACTUS_FFI_EXPORT const char* cactus_get_last_error(void);
+// level: 0=DEBUG, 1=INFO, 2=WARN (default), 3=ERROR, 4=NONE
+CACTUS_FFI_EXPORT void cactus_log_set_level(int level);
+
+typedef void (*cactus_log_callback_t)(int level, const char* component, const char* message, void* user_data);
+CACTUS_FFI_EXPORT void cactus_log_set_callback(cactus_log_callback_t callback, void* user_data);
+
CACTUS_FFI_EXPORT void cactus_set_telemetry_environment(const char* framework, const char* cache_location, const char* version);
CACTUS_FFI_EXPORT void cactus_set_app_id(const char* app_id);
CACTUS_FFI_EXPORT void cactus_telemetry_flush(void);
CACTUS_FFI_EXPORT void cactus_telemetry_shutdown(void);
+// cactus graph export
+typedef void* cactus_graph_t;
+typedef uint64_t cactus_node_t;
+
+typedef struct {
+ int32_t precision;
+ size_t rank;
+ size_t shape[8];
+ size_t num_elements;
+ size_t byte_size;
+} cactus_tensor_info_t;
+
+CACTUS_FFI_EXPORT cactus_graph_t cactus_graph_create(void);
+CACTUS_FFI_EXPORT void cactus_graph_destroy(cactus_graph_t graph);
+CACTUS_FFI_EXPORT int cactus_graph_hard_reset(cactus_graph_t graph);
+
+CACTUS_FFI_EXPORT int cactus_graph_input(
+ cactus_graph_t graph, const size_t* shape, size_t rank, int32_t precision,
+cactus_node_t* out_node);
+
+CACTUS_FFI_EXPORT int cactus_graph_set_input(
+ cactus_graph_t graph, cactus_node_t node, const void* data, int32_t
+precision);
+CACTUS_FFI_EXPORT int cactus_graph_set_external_input(
+ cactus_graph_t graph, cactus_node_t node, void* data, int32_t precision);
+
+CACTUS_FFI_EXPORT int cactus_graph_precision_cast(
+ cactus_graph_t graph, cactus_node_t input, int32_t target_precision, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_quantize_activations(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_add(cactus_graph_t graph, cactus_node_t a,
+cactus_node_t b, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_add_clipped(cactus_graph_t graph, cactus_node_t a,
+cactus_node_t b, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_subtract(cactus_graph_t graph, cactus_node_t
+a, cactus_node_t b, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_multiply(cactus_graph_t graph, cactus_node_t
+a, cactus_node_t b, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_divide(cactus_graph_t graph, cactus_node_t
+a, cactus_node_t b, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_scalar_add(cactus_graph_t graph, cactus_node_t x, float value, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_subtract(cactus_graph_t graph, cactus_node_t x, float value, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_multiply(cactus_graph_t graph, cactus_node_t x, float value, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_divide(cactus_graph_t graph, cactus_node_t x, float value, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_exp(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_sqrt(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_cos(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_sin(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scalar_log(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_abs(cactus_graph_t graph, cactus_node_t x,
+cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_pow(cactus_graph_t graph, cactus_node_t x,
+float exponent, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_view(
+ cactus_graph_t graph, cactus_node_t x, const size_t* shape, size_t rank,
+cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_flatten(
+ cactus_graph_t graph, cactus_node_t x, int32_t start_dim, int32_t end_dim,
+cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_reshape(
+ cactus_graph_t graph, cactus_node_t x, const size_t* shape, size_t rank, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_transpose(
+ cactus_graph_t graph, cactus_node_t x, int32_t backend, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_transpose_n(
+ cactus_graph_t graph, cactus_node_t x, const size_t* permutation, size_t rank, int32_t backend, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_slice(
+ cactus_graph_t graph, cactus_node_t x, int32_t axis, size_t start, size_t length, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_index(
+ cactus_graph_t graph, cactus_node_t x, size_t index_value, int32_t dim, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_sum(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_mean(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_variance(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_min(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_max(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_concat(
+ cactus_graph_t graph, cactus_node_t a, cactus_node_t b, int32_t axis,
+cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_cat(
+ cactus_graph_t graph, const cactus_node_t* nodes, size_t count, int32_t
+axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_matmul(
+ cactus_graph_t graph, cactus_node_t a, cactus_node_t b, bool pretransposed_rhs, int32_t backend, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gather(
+ cactus_graph_t graph, cactus_node_t tensor, cactus_node_t indices, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_embedding_from_tensor(
+ cactus_graph_t graph, cactus_node_t embedding_tensor, cactus_node_t indices, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_embedding_from_file(
+ cactus_graph_t graph, const char* filename, cactus_node_t indices, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_mmap_embeddings(
+ cactus_graph_t graph, const char* filename, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_mmap_weights(
+ cactus_graph_t graph, const char* filename, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_bilinear_interpolation(
+ cactus_graph_t graph, cactus_node_t pos_embeds, size_t dst_height, size_t dst_width, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_set_grouped_scales(
+ cactus_graph_t graph, cactus_node_t node, size_t group_size, size_t num_groups, void* scales_ptr);
+CACTUS_FFI_EXPORT int cactus_graph_set_interleaved(
+ cactus_graph_t graph, cactus_node_t node, bool interleaved, size_t original_n);
+CACTUS_FFI_EXPORT int cactus_graph_release_weight_pages(cactus_graph_t graph, cactus_node_t node);
+CACTUS_FFI_EXPORT int cactus_graph_prefetch_weight_pages(cactus_graph_t graph, cactus_node_t node);
+CACTUS_FFI_EXPORT int cactus_graph_release_all_weight_pages(cactus_graph_t graph);
+
+CACTUS_FFI_EXPORT int cactus_graph_relu(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_silu(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gelu(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gelu_erf(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_sigmoid(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_tanh(cactus_graph_t graph, cactus_node_t x, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_glu(cactus_graph_t graph, cactus_node_t x, int32_t axis, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_layernorm(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, cactus_node_t bias, float epsilon, bool has_bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_groupnorm(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, cactus_node_t bias, size_t num_groups, float epsilon, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_batchnorm(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, cactus_node_t bias, cactus_node_t running_mean, cactus_node_t running_var, int32_t axis, float epsilon, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_topk(cactus_graph_t graph, cactus_node_t input, size_t k, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_rms_norm(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, float epsilon, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_rope(
+ cactus_graph_t graph, cactus_node_t input, float theta, size_t position_offset, int32_t backend, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_rope_gptj(
+ cactus_graph_t graph, cactus_node_t input, float theta, size_t position_offset, size_t rot_dim, int32_t backend, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_softmax(cactus_graph_t graph, cactus_node_t input, int32_t axis, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_attention(
+ cactus_graph_t graph, cactus_node_t query, cactus_node_t key, cactus_node_t value, float scale, bool is_causal, size_t position_offset, size_t window_size, int32_t backend, bool use_mask, cactus_node_t mask, bool additive_mask, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_rel_pos_bias(
+ cactus_graph_t graph, cactus_node_t query, cactus_node_t relative_key, float scale, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_attention_int8_hybrid(
+ cactus_graph_t graph, cactus_node_t query, cactus_node_t key_new, cactus_node_t value_new, float scale, size_t position_offset,
+ const int8_t* cached_keys, const int8_t* cached_values, const float* k_scales, const float* v_scales,
+ size_t cache_len, size_t num_kv_heads, size_t head_dim, size_t window_size, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_conv1d_causal(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, size_t kernel_size, size_t dilation, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv1d_k3(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, size_t stride, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv1d_k7s3(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, cactus_node_t bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv1d(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, size_t stride, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv1d_same_depthwise_k9(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv1d_pointwise(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv2d_k3s2p1(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv2d_depthwise_k3s2p1(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_conv2d_pointwise_1x1(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, bool has_bias, cactus_node_t bias, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_lstm_cell(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t h_prev, cactus_node_t c_prev, cactus_node_t weight_ih, cactus_node_t weight_hh, cactus_node_t bias_ih, cactus_node_t bias_hh, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gated_deltanet_decode(
+ cactus_graph_t graph, cactus_node_t query, cactus_node_t key, cactus_node_t value, cactus_node_t gate_log, cactus_node_t beta, cactus_node_t initial_state, float scale, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gated_deltanet_prefill(
+ cactus_graph_t graph, cactus_node_t query, cactus_node_t key, cactus_node_t value, cactus_node_t gate_log, cactus_node_t beta, cactus_node_t initial_state, size_t chunk_size, float scale, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_stft(
+ cactus_graph_t graph, cactus_node_t input, cactus_node_t weight, size_t stride, size_t num_fft_bins, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_altup_predict(
+ cactus_graph_t graph, cactus_node_t coefs, const cactus_node_t* streams, size_t num_streams, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_altup_correct(
+ cactus_graph_t graph, cactus_node_t coefs, cactus_node_t innovation, const cactus_node_t* predictions, size_t num_predictions, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_gaussian_topk(
+ cactus_graph_t graph, cactus_node_t input, float ppf, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_moe_layer_gated(
+ cactus_graph_t graph, cactus_node_t hidden, cactus_node_t routing_probs, cactus_node_t topk_indices,
+ const cactus_node_t* w1_weights, const cactus_node_t* w3_weights, const cactus_node_t* w2_weights,
+ size_t num_experts, size_t num_experts_per_tok, bool normalize_routing, float epsilon, float routed_scaling_factor, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_moe_layer_ungated(
+ cactus_graph_t graph, cactus_node_t hidden, cactus_node_t routing_probs, cactus_node_t topk_indices,
+ const cactus_node_t* w1_weights, const cactus_node_t* w2_weights,
+ size_t num_experts, size_t num_experts_per_tok, bool normalize_routing, float epsilon, float routed_scaling_factor, int32_t activation, cactus_node_t* out);
+
+CACTUS_FFI_EXPORT int cactus_graph_sample(
+ cactus_graph_t graph, cactus_node_t logits, float temperature, float top_p, size_t top_k, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_scatter_topk(
+ cactus_graph_t graph, cactus_node_t indices, cactus_node_t values, size_t num_classes, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_persistent(
+ cactus_graph_t graph, cactus_node_t source_node, cactus_node_t* out);
+CACTUS_FFI_EXPORT int cactus_graph_is_populated(
+ cactus_graph_t graph, cactus_node_t persistent_node, int32_t* out_is_populated);
+CACTUS_FFI_EXPORT int cactus_graph_invalidate_persistent(
+ cactus_graph_t graph, cactus_node_t persistent_node);
+
+CACTUS_FFI_EXPORT int cactus_graph_execute(cactus_graph_t graph);
+CACTUS_FFI_EXPORT int cactus_graph_get_output_ptr(cactus_graph_t graph,
+cactus_node_t node, void** out_ptr);
+CACTUS_FFI_EXPORT int cactus_graph_get_output_info(cactus_graph_t graph,
+cactus_node_t node, cactus_tensor_info_t* out_info);
+
#ifdef __cplusplus
}
#endif
diff --git a/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h
index 3b5d97f..6570f09 100644
--- a/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h
+++ b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h
@@ -6,6 +6,7 @@
#include
#include
#include
+#include