diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 00000000..f4d6927d --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,28 @@ +# clang-tidy for the C++ FFI wrappers (llama-cpp-bindings-sys/*.cpp). Every check +# is enabled and every warning is an error, except: +# - Other-project convention groups this codebase is not and cannot satisfy +# (some of which contradict each other and the modernize checks, e.g. +# fuchsia-trailing-return vs modernize-use-trailing-return-type): +# abseil/altera/android/boost/darwin/fuchsia/linuxkernel/llvm/llvmlibc/mpi/openmp/zircon. +# - bugprone-easily-swappable-parameters: the wrapper signatures mirror the +# llama.cpp C API shape (adjacent llama_pos p0, p1, ...) and cannot be reshaped. +# Headers are out of scope here: they are C-ABI (bindgen parses them as C), so C++ +# modernizations would break them. cppcheck lints the headers instead. +Checks: > + *, + -abseil-*, + -altera-*, + -android-*, + -boost-*, + -darwin-*, + -fuchsia-*, + -linuxkernel-*, + -llvm-*, + -llvmlibc-*, + -mpi-*, + -openmp-*, + -zircon-*, + -bugprone-easily-swappable-parameters +WarningsAsErrors: '*' +HeaderFilterRegex: '$^' +FormatStyle: none diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 95deb1c8..48caacdf 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -46,3 +46,29 @@ jobs: - uses: ./.github/actions/install-rust-toolchain - run: make test.unit + + cppcheck: + name: cppcheck + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - run: sudo apt-get update && sudo apt-get install -y cppcheck + + - run: make lint.cpp.cppcheck + + clang-tidy: + name: clang-tidy + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - run: sudo apt-get update && sudo apt-get install -y clang-tidy + + - run: make lint.cpp.clang-tidy diff --git a/.gitmodules b/.gitmodules index f3ceca93..8bd5c673 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "llama-cpp-bindings-sys/llama.cpp"] path = llama-cpp-bindings-sys/llama.cpp url = https://github.com/ggml-org/llama.cpp +[submodule "llama-cpp-bindings-sys/GSL"] + path = llama-cpp-bindings-sys/GSL + url = https://github.com/microsoft/GSL.git diff --git a/Makefile b/Makefile index e2061b61..53f9e117 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,25 @@ fmt: fmt.check: cargo fmt --all --check +.PHONY: lint.cpp +lint.cpp: lint.cpp.clang-tidy lint.cpp.cppcheck + +.PHONY: lint.cpp.clang-tidy +lint.cpp.clang-tidy: + cd llama-cpp-bindings-sys && clang-tidy wrapper_*.cpp -- \ + -std=c++17 -I. -IGSL/include -Illama.cpp -Illama.cpp/common \ + -Illama.cpp/include -Illama.cpp/ggml/include -Illama.cpp/vendor + +.PHONY: lint.cpp.cppcheck +lint.cpp.cppcheck: + cd llama-cpp-bindings-sys && cppcheck --enable=all --inconclusive \ + --check-level=exhaustive --std=c++17 --error-exitcode=1 \ + -I. -IGSL/include -Illama.cpp -Illama.cpp/common -Illama.cpp/include \ + -Illama.cpp/ggml/include -Illama.cpp/vendor \ + --suppress='*:llama.cpp/*' --suppress='*:GSL/*' \ + --suppress=missingIncludeSystem --suppress=unusedFunction \ + --suppress=checkersReport --suppress=toomanyconfigs wrapper_*.cpp + .PHONY: test test: test.unit test.llms diff --git a/llama-cpp-bindings-build/src/cmake_config.rs b/llama-cpp-bindings-build/src/cmake_config.rs index 7d306df2..12faa145 100644 --- a/llama-cpp-bindings-build/src/cmake_config.rs +++ b/llama-cpp-bindings-build/src/cmake_config.rs @@ -70,6 +70,7 @@ fn configure_base_defines(config: &mut Config) { config.define("LLAMA_BUILD_EXAMPLES", "OFF"); config.define("LLAMA_BUILD_SERVER", "OFF"); config.define("LLAMA_BUILD_TOOLS", "OFF"); + config.define("LLAMA_BUILD_APP", "OFF"); config.define("LLAMA_BUILD_COMMON", "ON"); config.define("LLAMA_CURL", "OFF"); config.cflag("-w"); @@ -231,16 +232,8 @@ fn configure_msvc_release_workaround(config: &mut Config, profile: &str) { } fn configure_android_cmake(config: &mut Config, ndk: &AndroidNdk, _target_triple: &str) { - #[expect( - clippy::assertions_on_constants, - reason = "the assertion enforces a feature flag invariant at build time" - )] - { - assert!( - !(cfg!(feature = "shared-stdcxx") && cfg!(feature = "static-stdcxx")), - "Features 'shared-stdcxx' and 'static-stdcxx' are mutually exclusive" - ); - } + #[cfg(all(feature = "shared-stdcxx", feature = "static-stdcxx"))] + compile_error!("Features 'shared-stdcxx' and 'static-stdcxx' are mutually exclusive"); println!("cargo:rerun-if-env-changed=ANDROID_NDK"); println!("cargo:rerun-if-env-changed=NDK_ROOT"); diff --git a/llama-cpp-bindings-build/src/cpp_wrapper.rs b/llama-cpp-bindings-build/src/cpp_wrapper.rs index fdd8ab37..c4a896f5 100644 --- a/llama-cpp-bindings-build/src/cpp_wrapper.rs +++ b/llama-cpp-bindings-build/src/cpp_wrapper.rs @@ -3,7 +3,7 @@ use std::path::Path; use crate::glob_paths; use crate::target_os::TargetOs; -const WRAPPER_SOURCE_PATTERNS: &[&str] = &["wrapper_*.cpp", "marker_probes/**/*.cpp"]; +const WRAPPER_SOURCE_PATTERNS: &[&str] = &["wrapper_*.cpp"]; pub fn compile_cpp_wrappers(llama_src: &Path, target_os: &TargetOs) { let mut build = cc::Build::new(); @@ -12,6 +12,7 @@ pub fn compile_cpp_wrappers(llama_src: &Path, target_os: &TargetOs) { .cpp(true) .warnings(false) .include(".") + .include("GSL/include") .include(llama_src) .include(llama_src.join("common")) .include(llama_src.join("include")) diff --git a/llama-cpp-bindings-build/src/rebuild_tracking.rs b/llama-cpp-bindings-build/src/rebuild_tracking.rs index 4ee08d6c..6a5c6f77 100644 --- a/llama-cpp-bindings-build/src/rebuild_tracking.rs +++ b/llama-cpp-bindings-build/src/rebuild_tracking.rs @@ -4,12 +4,7 @@ use walkdir::DirEntry; use crate::glob_paths; -const WRAPPER_TRACKING_PATTERNS: &[&str] = &[ - "wrapper*.h", - "wrapper_*.cpp", - "marker_probes/**/*.h", - "marker_probes/**/*.cpp", -]; +const WRAPPER_TRACKING_PATTERNS: &[&str] = &["wrapper*.h", "wrapper_*.cpp"]; fn is_hidden(entry: &DirEntry) -> bool { entry diff --git a/llama-cpp-bindings-sys/GSL b/llama-cpp-bindings-sys/GSL new file mode 160000 index 00000000..152d6eb9 --- /dev/null +++ b/llama-cpp-bindings-sys/GSL @@ -0,0 +1 @@ +Subproject commit 152d6eb989a1ecd23fe9c9cfb2fb8cfc7c0cd0c1 diff --git a/llama-cpp-bindings-sys/llama.cpp b/llama-cpp-bindings-sys/llama.cpp index 59778f01..d73cd076 160000 --- a/llama-cpp-bindings-sys/llama.cpp +++ b/llama-cpp-bindings-sys/llama.cpp @@ -1 +1 @@ -Subproject commit 59778f0196a82db32580bb649d5d839355d6d7bf +Subproject commit d73cd076740db9c111d0e58ddd4486904469e75e diff --git a/llama-cpp-bindings-sys/marker_probes/chunked_thinking.cpp b/llama-cpp-bindings-sys/marker_probes/chunked_thinking.cpp deleted file mode 100644 index d29e49ae..00000000 --- a/llama-cpp-bindings-sys/marker_probes/chunked_thinking.cpp +++ /dev/null @@ -1,144 +0,0 @@ -#include "chunked_thinking.h" - -#include "llama.cpp/common/chat-auto-parser.h" -#include "llama.cpp/common/chat.h" - -#include -#include -#include -#include -#include - -namespace marker_probes { - -namespace { - -constexpr std::string_view REASON_PROBE = "__PADDLER_REASON_PROBE_3F4A8C__"; -constexpr std::string_view RESPONSE_PROBE = "__PADDLER_RESPONSE_PROBE_3F4A8C__"; - -std::string trim_copy(std::string_view input) { - auto first = input.find_first_not_of(" \t\r\n"); - if (first == std::string_view::npos) { - return {}; - } - auto last = input.find_last_not_of(" \t\r\n"); - return std::string(input.substr(first, last - first + 1)); -} - -bool render_template(const common_chat_template & tmpl, - const autoparser::generation_params & params, - std::string & out) { - try { - out = common_chat_template_direct_apply(tmpl, params); - return true; - } catch (const std::exception &) { - return false; - } catch (...) { - return false; - } -} - -autoparser::generation_params plain_text_params() { - autoparser::generation_params params; - params.add_generation_prompt = false; - params.enable_thinking = true; - params.is_inference = false; - params.add_inference = false; - params.mark_input = false; - params.messages = nlohmann::ordered_json::array({ - nlohmann::ordered_json{ { "role", "user" }, { "content", "U" } }, - nlohmann::ordered_json{ { "role", "assistant" }, { "content", std::string(RESPONSE_PROBE) } }, - }); - return params; -} - -autoparser::generation_params chunked_thinking_params() { - autoparser::generation_params params; - params.add_generation_prompt = false; - params.enable_thinking = true; - params.is_inference = false; - params.add_inference = false; - params.mark_input = false; - params.messages = nlohmann::ordered_json::array({ - nlohmann::ordered_json{ { "role", "user" }, { "content", "U" } }, - nlohmann::ordered_json{ - { "role", "assistant" }, - { "content", nlohmann::ordered_json::array({ - nlohmann::ordered_json{ { "type", "thinking" }, { "thinking", std::string(REASON_PROBE) } }, - nlohmann::ordered_json{ { "type", "text" }, { "text", std::string(RESPONSE_PROBE) } }, - }) }, - }, - }); - return params; -} - -bool contains(std::string_view haystack, std::string_view needle) { - return haystack.find(needle) != std::string_view::npos; -} - -} // namespace - -probe_result chunked_thinking(const common_chat_template & tmpl) { - probe_result result; - - std::string render_plain; - if (!render_template(tmpl, plain_text_params(), render_plain)) { - return result; - } - - std::string render_chunked; - if (!render_template(tmpl, chunked_thinking_params(), render_chunked)) { - return result; - } - - if (!contains(render_chunked, REASON_PROBE) || !contains(render_chunked, RESPONSE_PROBE)) { - return result; - } - - const std::size_t plain_size = render_plain.size(); - const std::size_t chunked_size = render_chunked.size(); - const std::size_t min_size = std::min(plain_size, chunked_size); - - std::size_t common_prefix = 0; - while (common_prefix < min_size && render_plain[common_prefix] == render_chunked[common_prefix]) { - ++common_prefix; - } - - std::size_t common_suffix = 0; - while (common_suffix < min_size - common_prefix - && render_plain[plain_size - 1 - common_suffix] == render_chunked[chunked_size - 1 - common_suffix]) { - ++common_suffix; - } - - if (common_prefix + common_suffix > chunked_size) { - return result; - } - - std::string_view diff_slice(render_chunked); - diff_slice = diff_slice.substr(common_prefix, chunked_size - common_prefix - common_suffix); - - auto reason_pos = diff_slice.find(REASON_PROBE); - if (reason_pos == std::string_view::npos) { - return result; - } - - std::string start = trim_copy(diff_slice.substr(0, reason_pos)); - std::string end = trim_copy(diff_slice.substr(reason_pos + REASON_PROBE.size())); - - if (start.empty() || end.empty()) { - return result; - } - if (contains(start, REASON_PROBE) || contains(start, RESPONSE_PROBE)) { - return result; - } - if (contains(end, REASON_PROBE) || contains(end, RESPONSE_PROBE)) { - return result; - } - - result.start = std::move(start); - result.end = std::move(end); - result.found = true; - return result; -} - -} // namespace marker_probes diff --git a/llama-cpp-bindings-sys/marker_probes/chunked_thinking.h b/llama-cpp-bindings-sys/marker_probes/chunked_thinking.h deleted file mode 100644 index 9128f68b..00000000 --- a/llama-cpp-bindings-sys/marker_probes/chunked_thinking.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#include "marker_probe.h" - -namespace marker_probes { - -probe_result chunked_thinking(const common_chat_template & tmpl); - -} // namespace marker_probes diff --git a/llama-cpp-bindings-sys/marker_probes/marker_probe.h b/llama-cpp-bindings-sys/marker_probes/marker_probe.h deleted file mode 100644 index 3df72c39..00000000 --- a/llama-cpp-bindings-sys/marker_probes/marker_probe.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include "llama.cpp/common/chat.h" - -#include -#include - -namespace marker_probes { - -struct probe_result { - std::string start; - std::string end; - bool found = false; -}; - -using probe_fn = probe_result (*)(const common_chat_template &); - -const std::vector & registered(); - -} // namespace marker_probes diff --git a/llama-cpp-bindings-sys/marker_probes/registry.cpp b/llama-cpp-bindings-sys/marker_probes/registry.cpp deleted file mode 100644 index 315bc56c..00000000 --- a/llama-cpp-bindings-sys/marker_probes/registry.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include "marker_probe.h" - -#include "chunked_thinking.h" - -#include - -namespace marker_probes { - -const std::vector & registered() { - static const std::vector probes = { - chunked_thinking, - }; - return probes; -} - -} // namespace marker_probes diff --git a/llama-cpp-bindings-sys/wrapper_chat_apply.cpp b/llama-cpp-bindings-sys/wrapper_chat_apply.cpp index 96b93b70..9a6c81c3 100644 --- a/llama-cpp-bindings-sys/wrapper_chat_apply.cpp +++ b/llama-cpp-bindings-sys/wrapper_chat_apply.cpp @@ -1,64 +1,72 @@ #include "wrapper_chat_apply.h" +#include "nlohmann/json_fwd.hpp" #include "wrapper_token_text.h" #include "llama.cpp/common/chat-auto-parser.h" #include "llama.cpp/common/chat.h" #include "llama.cpp/include/llama.h" +#include "wrapper_utils.h" +#include #include +#include #include #include #include +#include using wrapper_helpers::token_text_or_empty; -extern "C" llama_rs_apply_chat_template_status llama_rs_apply_chat_template( +extern "C" auto llama_rs_apply_chat_template( const struct llama_model * model, const char * template_src, const char * const * roles, const char * const * contents, size_t n_messages, int add_generation_prompt, + int enable_thinking, char ** out_string, - char ** out_error) { - if (out_string) { + char ** out_error) -> llama_rs_apply_chat_template_status { + if (out_string != nullptr) { *out_string = nullptr; } - if (out_error) { + if (out_error != nullptr) { *out_error = nullptr; } - if (!model) { + if (model == nullptr) { return LLAMA_RS_APPLY_CHAT_TEMPLATE_NULL_MODEL_ARG; } - if (!template_src) { + if (template_src == nullptr) { return LLAMA_RS_APPLY_CHAT_TEMPLATE_NULL_TEMPLATE_ARG; } - if (n_messages > 0 && (!roles || !contents)) { + if (n_messages > 0 && ((roles == nullptr) || (contents == nullptr))) { return LLAMA_RS_APPLY_CHAT_TEMPLATE_NULL_MESSAGES_ARG; } - if (!out_string) { + if (out_string == nullptr) { return LLAMA_RS_APPLY_CHAT_TEMPLATE_NULL_OUT_STRING_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_APPLY_CHAT_TEMPLATE_NULL_OUT_ERROR_ARG; } try { const llama_vocab * vocab = llama_model_get_vocab(model); - if (!vocab) { + if (vocab == nullptr) { return LLAMA_RS_APPLY_CHAT_TEMPLATE_MODEL_HAS_NO_VOCAB; } - std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); - std::string eos_token = token_text_or_empty(vocab, llama_vocab_eos(vocab)); + std::string const bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); + std::string const eos_token = token_text_or_empty(vocab, llama_vocab_eos(vocab)); - common_chat_template tmpl(template_src, bos_token, eos_token); + common_chat_template const tmpl(template_src, bos_token, eos_token); nlohmann::ordered_json messages = nlohmann::ordered_json::array(); + const gsl::span role_span(roles, n_messages); + const gsl::span content_span(contents, n_messages); for (size_t index = 0; index < n_messages; index++) { messages.push_back({ - { "role", roles[index] ? roles[index] : "" }, - { "content", contents[index] ? contents[index] : "" }, + { "role", (role_span[index] != nullptr) ? role_span[index] : "" }, + { "content", (content_span[index] != nullptr) ? content_span[index] : "" }, }); } @@ -66,14 +74,15 @@ extern "C" llama_rs_apply_chat_template_status llama_rs_apply_chat_template( inputs.messages = std::move(messages); inputs.tools = nlohmann::ordered_json::array(); inputs.add_generation_prompt = add_generation_prompt != 0; + inputs.enable_thinking = enable_thinking != 0; - std::string rendered = common_chat_template_direct_apply(tmpl, inputs); + std::string const rendered = common_chat_template_direct_apply(tmpl, inputs); if (rendered.empty()) { return LLAMA_RS_APPLY_CHAT_TEMPLATE_TEMPLATE_APPLICATION_FAILED; } *out_string = llama_rs_dup_string(rendered); - if (!*out_string) { + if (*out_string == nullptr) { return LLAMA_RS_APPLY_CHAT_TEMPLATE_ERROR_STRING_ALLOCATION_FAILED; } @@ -82,13 +91,13 @@ extern "C" llama_rs_apply_chat_template_status llama_rs_apply_chat_template( return LLAMA_RS_APPLY_CHAT_TEMPLATE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_APPLY_CHAT_TEMPLATE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_APPLY_CHAT_TEMPLATE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_APPLY_CHAT_TEMPLATE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_APPLY_CHAT_TEMPLATE_VENDORED_THREW_CXX_EXCEPTION; diff --git a/llama-cpp-bindings-sys/wrapper_chat_apply.h b/llama-cpp-bindings-sys/wrapper_chat_apply.h index 9d124bdd..62dc3f65 100644 --- a/llama-cpp-bindings-sys/wrapper_chat_apply.h +++ b/llama-cpp-bindings-sys/wrapper_chat_apply.h @@ -29,6 +29,7 @@ llama_rs_apply_chat_template_status llama_rs_apply_chat_template( const char * const * contents, size_t n_messages, int add_generation_prompt, + int enable_thinking, char ** out_string, char ** out_error); diff --git a/llama-cpp-bindings-sys/wrapper_chat_parse.cpp b/llama-cpp-bindings-sys/wrapper_chat_parse.cpp index 0bf59aee..3adc9396 100644 --- a/llama-cpp-bindings-sys/wrapper_chat_parse.cpp +++ b/llama-cpp-bindings-sys/wrapper_chat_parse.cpp @@ -1,15 +1,20 @@ #include "wrapper_chat_parse.h" +#include // IWYU pragma: keep +#include +#include "peg-parser.h" #include "wrapper_token_text.h" #include "llama.cpp/common/chat-auto-parser.h" #include "llama.cpp/common/chat.h" #include "llama.cpp/include/llama.h" -#include "marker_probes/marker_probe.h" +#include "wrapper_utils.h" +#include #include +#include #include -#include #include +#include using wrapper_helpers::token_text_or_empty; @@ -17,139 +22,205 @@ struct llama_rs_parsed_chat { common_chat_msg message; }; -static char * dup_or_set_alloc_flag(const std::string & source, bool * out_alloc_failed) { - *out_alloc_failed = false; - char * dup = llama_rs_dup_string(source); - if (!dup) { - *out_alloc_failed = true; - } - return dup; +struct llama_rs_chat_parser { + autoparser::autoparser parser; +}; + +namespace { +void dup_or_set_alloc_flag(const std::string & source, char ** out_dup, bool * out_alloc_failed) { + *out_dup = llama_rs_dup_string(source); + *out_alloc_failed = (*out_dup == nullptr); } +} // namespace -extern "C" llama_rs_parse_chat_message_status llama_rs_parse_chat_message( +extern "C" auto llama_rs_chat_parser_create( const struct llama_model * model, - const char * tools_json, - const char * input, - int is_partial, - llama_rs_parsed_chat_handle * out_handle, - char ** out_error) { - if (out_handle) { - *out_handle = nullptr; - } - if (out_error) { + const char * reasoning_open, + const char * reasoning_close, + llama_rs_chat_parser_handle * out_parser, + char ** out_error) -> llama_rs_chat_parser_create_status { + if (out_parser != nullptr) { + *out_parser = nullptr; + } + if (out_error != nullptr) { *out_error = nullptr; } - if (!model) { - return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_MODEL_ARG; + if (model == nullptr) { + return LLAMA_RS_CHAT_PARSER_CREATE_NULL_MODEL_ARG; } - if (!input) { - return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG; + if (out_parser == nullptr) { + return LLAMA_RS_CHAT_PARSER_CREATE_NULL_OUT_PARSER_ARG; } - if (!out_handle) { - return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG; - } - if (!out_error) { - return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG; + if (out_error == nullptr) { + return LLAMA_RS_CHAT_PARSER_CREATE_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); - if (!tmpl_src) { - return LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE; + if (tmpl_src == nullptr) { + return LLAMA_RS_CHAT_PARSER_CREATE_MODEL_HAS_NO_CHAT_TEMPLATE; } const llama_vocab * vocab = llama_model_get_vocab(model); - if (!vocab) { - return LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB; + if (vocab == nullptr) { + return LLAMA_RS_CHAT_PARSER_CREATE_MODEL_HAS_NO_VOCAB; } - std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); - std::string eos_token = token_text_or_empty(vocab, llama_vocab_eos(vocab)); + std::string const bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); + std::string const eos_token = token_text_or_empty(vocab, llama_vocab_eos(vocab)); + + common_chat_template const tmpl(tmpl_src, bos_token, eos_token); - common_chat_template tmpl(tmpl_src, bos_token, eos_token); + auto parser_handle = std::make_unique(); + parser_handle->parser.analyze_template(tmpl); - autoparser::autoparser parser; - parser.analyze_template(tmpl); + if (parser_handle->parser.reasoning.mode == autoparser::reasoning_mode::NONE + && reasoning_open != nullptr && reasoning_close != nullptr + && *reasoning_open != '\0' && *reasoning_close != '\0') { + parser_handle->parser.reasoning.mode = autoparser::reasoning_mode::TAG_BASED; + parser_handle->parser.reasoning.start = reasoning_open; + parser_handle->parser.reasoning.end = reasoning_close; + } + + *out_parser = parser_handle.release(); + + return LLAMA_RS_CHAT_PARSER_CREATE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_CHAT_PARSER_CREATE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & ex) { + *out_error = llama_rs_dup_string(std::string(ex.what())); + if (*out_error == nullptr) { + return LLAMA_RS_CHAT_PARSER_CREATE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_CHAT_PARSER_CREATE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); + if (*out_error == nullptr) { + return LLAMA_RS_CHAT_PARSER_CREATE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_CHAT_PARSER_CREATE_VENDORED_THREW_CXX_EXCEPTION; + } +} - if (parser.reasoning.mode == autoparser::reasoning_mode::NONE) { - for (auto probe : marker_probes::registered()) { - auto fallback = probe(tmpl); - if (fallback.found) { - parser.reasoning.mode = autoparser::reasoning_mode::TAG_BASED; - parser.reasoning.start = std::move(fallback.start); - parser.reasoning.end = std::move(fallback.end); - break; - } +extern "C" auto llama_rs_chat_parser_free( + llama_rs_chat_parser_handle parser, + char ** out_error) -> llama_rs_chat_parser_free_status { + if (out_error != nullptr) { + *out_error = nullptr; + } + try { + const std::unique_ptr reclaimed(parser); + return LLAMA_RS_CHAT_PARSER_FREE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_CHAT_PARSER_FREE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error != nullptr) { + *out_error = llama_rs_dup_string(err.what()); + if (*out_error == nullptr) { + return LLAMA_RS_CHAT_PARSER_FREE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_CHAT_PARSER_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error != nullptr) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (*out_error == nullptr) { + return LLAMA_RS_CHAT_PARSER_FREE_ERROR_STRING_ALLOCATION_FAILED; } } + return LLAMA_RS_CHAT_PARSER_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION; + } +} +extern "C" auto llama_rs_parse_chat_message( + llama_rs_chat_parser_handle parser, + const char * tools_json, + const char * input, + int is_partial, + llama_rs_parsed_chat_handle * out_handle, + char ** out_error) -> llama_rs_parse_chat_message_status { + if (out_handle != nullptr) { + *out_handle = nullptr; + } + if (out_error != nullptr) { + *out_error = nullptr; + } + if (parser == nullptr) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_PARSER_ARG; + } + if (input == nullptr) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG; + } + if (out_handle == nullptr) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG; + } + if (out_error == nullptr) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG; + } + + try { autoparser::generation_params inputs; - inputs.add_generation_prompt = true; - inputs.enable_thinking = true; - inputs.messages = nlohmann::ordered_json::array({ - { { "role", "user" }, { "content", "ping" } } - }); - if (tools_json && tools_json[0] != '\0') { + if ((tools_json != nullptr) && *tools_json != '\0') { inputs.tools = nlohmann::ordered_json::parse(tools_json); } else { inputs.tools = nlohmann::ordered_json::array(); } - common_chat_params chat_params = - autoparser::peg_generator::generate_parser(tmpl, inputs, parser); + common_peg_arena const chat_parser = parser->parser.build_parser(inputs, std::string()); - common_chat_parser_params parser_params(chat_params); - parser_params.parser.load(chat_params.parser); + common_chat_parser_params parser_params; + parser_params.format = COMMON_CHAT_FORMAT_PEG_NATIVE; - common_chat_msg parsed = common_chat_parse(input, is_partial != 0, parser_params); + common_chat_msg parsed = + common_chat_peg_parse(chat_parser, input, is_partial != 0, parser_params); - auto * handle = new llama_rs_parsed_chat{}; + auto handle = std::make_unique(); handle->message = std::move(parsed); - *out_handle = handle; + *out_handle = handle.release(); return LLAMA_RS_PARSE_CHAT_MESSAGE_OK; } catch (const std::bad_alloc &) { return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_parsed_chat_free_status llama_rs_parsed_chat_free( +extern "C" auto llama_rs_parsed_chat_free( llama_rs_parsed_chat_handle handle, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_parsed_chat_free_status { + if (out_error != nullptr) { *out_error = nullptr; } try { - delete handle; + const std::unique_ptr reclaimed(handle); return LLAMA_RS_PARSED_CHAT_FREE_OK; } catch (const std::bad_alloc &) { return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED; } } @@ -157,20 +228,20 @@ extern "C" llama_rs_parsed_chat_free_status llama_rs_parsed_chat_free( } } -extern "C" llama_rs_parsed_chat_tool_call_count_status llama_rs_parsed_chat_tool_call_count( +extern "C" auto llama_rs_parsed_chat_tool_call_count( llama_rs_parsed_chat_handle handle, size_t * out_count, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_parsed_chat_tool_call_count_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_count) { + if (out_count != nullptr) { *out_count = 0; } - if (!handle) { + if (handle == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_HANDLE_ARG; } - if (!out_count) { + if (out_count == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_OUT_COUNT_ARG; } try { @@ -179,17 +250,17 @@ extern "C" llama_rs_parsed_chat_tool_call_count_status llama_rs_parsed_chat_tool } catch (const std::bad_alloc &) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED; } } @@ -197,21 +268,21 @@ extern "C" llama_rs_parsed_chat_tool_call_count_status llama_rs_parsed_chat_tool } } -extern "C" llama_rs_parsed_chat_tool_call_id_status llama_rs_parsed_chat_tool_call_id( +extern "C" auto llama_rs_parsed_chat_tool_call_id( llama_rs_parsed_chat_handle handle, size_t index, char ** out_string, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_parsed_chat_tool_call_id_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_string) { + if (out_string != nullptr) { *out_string = nullptr; } - if (!handle) { + if (handle == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_HANDLE_ARG; } - if (!out_string) { + if (out_string == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_OUT_STRING_ARG; } try { @@ -219,7 +290,7 @@ extern "C" llama_rs_parsed_chat_tool_call_id_status llama_rs_parsed_chat_tool_ca return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS; } bool alloc_failed = false; - *out_string = dup_or_set_alloc_flag(handle->message.tool_calls[index].id, &alloc_failed); + dup_or_set_alloc_flag(handle->message.tool_calls[index].id, out_string, &alloc_failed); if (alloc_failed) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; } @@ -227,17 +298,17 @@ extern "C" llama_rs_parsed_chat_tool_call_id_status llama_rs_parsed_chat_tool_ca } catch (const std::bad_alloc &) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; } } @@ -245,21 +316,21 @@ extern "C" llama_rs_parsed_chat_tool_call_id_status llama_rs_parsed_chat_tool_ca } } -extern "C" llama_rs_parsed_chat_tool_call_name_status llama_rs_parsed_chat_tool_call_name( +extern "C" auto llama_rs_parsed_chat_tool_call_name( llama_rs_parsed_chat_handle handle, size_t index, char ** out_string, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_parsed_chat_tool_call_name_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_string) { + if (out_string != nullptr) { *out_string = nullptr; } - if (!handle) { + if (handle == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_HANDLE_ARG; } - if (!out_string) { + if (out_string == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_OUT_STRING_ARG; } try { @@ -267,7 +338,7 @@ extern "C" llama_rs_parsed_chat_tool_call_name_status llama_rs_parsed_chat_tool_ return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS; } bool alloc_failed = false; - *out_string = dup_or_set_alloc_flag(handle->message.tool_calls[index].name, &alloc_failed); + dup_or_set_alloc_flag(handle->message.tool_calls[index].name, out_string, &alloc_failed); if (alloc_failed) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; } @@ -275,17 +346,17 @@ extern "C" llama_rs_parsed_chat_tool_call_name_status llama_rs_parsed_chat_tool_ } catch (const std::bad_alloc &) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; } } @@ -293,21 +364,21 @@ extern "C" llama_rs_parsed_chat_tool_call_name_status llama_rs_parsed_chat_tool_ } } -extern "C" llama_rs_parsed_chat_tool_call_arguments_status llama_rs_parsed_chat_tool_call_arguments( +extern "C" auto llama_rs_parsed_chat_tool_call_arguments( llama_rs_parsed_chat_handle handle, size_t index, char ** out_string, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_parsed_chat_tool_call_arguments_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_string) { + if (out_string != nullptr) { *out_string = nullptr; } - if (!handle) { + if (handle == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_HANDLE_ARG; } - if (!out_string) { + if (out_string == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_OUT_STRING_ARG; } try { @@ -315,8 +386,8 @@ extern "C" llama_rs_parsed_chat_tool_call_arguments_status llama_rs_parsed_chat_ return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS; } bool alloc_failed = false; - *out_string = dup_or_set_alloc_flag( - handle->message.tool_calls[index].arguments, &alloc_failed); + dup_or_set_alloc_flag( + handle->message.tool_calls[index].arguments, out_string, &alloc_failed); if (alloc_failed) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; } @@ -324,17 +395,17 @@ extern "C" llama_rs_parsed_chat_tool_call_arguments_status llama_rs_parsed_chat_ } catch (const std::bad_alloc &) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; } } @@ -342,25 +413,25 @@ extern "C" llama_rs_parsed_chat_tool_call_arguments_status llama_rs_parsed_chat_ } } -extern "C" llama_rs_parsed_chat_content_status llama_rs_parsed_chat_content( +extern "C" auto llama_rs_parsed_chat_content( llama_rs_parsed_chat_handle handle, char ** out_string, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_parsed_chat_content_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_string) { + if (out_string != nullptr) { *out_string = nullptr; } - if (!handle) { + if (handle == nullptr) { return LLAMA_RS_PARSED_CHAT_CONTENT_NULL_HANDLE_ARG; } - if (!out_string) { + if (out_string == nullptr) { return LLAMA_RS_PARSED_CHAT_CONTENT_NULL_OUT_STRING_ARG; } try { bool alloc_failed = false; - *out_string = dup_or_set_alloc_flag(handle->message.content, &alloc_failed); + dup_or_set_alloc_flag(handle->message.content, out_string, &alloc_failed); if (alloc_failed) { return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; } @@ -368,17 +439,17 @@ extern "C" llama_rs_parsed_chat_content_status llama_rs_parsed_chat_content( } catch (const std::bad_alloc &) { return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; } } @@ -386,25 +457,25 @@ extern "C" llama_rs_parsed_chat_content_status llama_rs_parsed_chat_content( } } -extern "C" llama_rs_parsed_chat_reasoning_content_status llama_rs_parsed_chat_reasoning_content( +extern "C" auto llama_rs_parsed_chat_reasoning_content( llama_rs_parsed_chat_handle handle, char ** out_string, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_parsed_chat_reasoning_content_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_string) { + if (out_string != nullptr) { *out_string = nullptr; } - if (!handle) { + if (handle == nullptr) { return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_HANDLE_ARG; } - if (!out_string) { + if (out_string == nullptr) { return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_OUT_STRING_ARG; } try { bool alloc_failed = false; - *out_string = dup_or_set_alloc_flag(handle->message.reasoning_content, &alloc_failed); + dup_or_set_alloc_flag(handle->message.reasoning_content, out_string, &alloc_failed); if (alloc_failed) { return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; } @@ -412,17 +483,17 @@ extern "C" llama_rs_parsed_chat_reasoning_content_status llama_rs_parsed_chat_re } catch (const std::bad_alloc &) { return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; } } diff --git a/llama-cpp-bindings-sys/wrapper_chat_parse.h b/llama-cpp-bindings-sys/wrapper_chat_parse.h index e235673c..d13d2c07 100644 --- a/llama-cpp-bindings-sys/wrapper_chat_parse.h +++ b/llama-cpp-bindings-sys/wrapper_chat_parse.h @@ -12,20 +12,49 @@ extern "C" { struct llama_rs_parsed_chat; typedef struct llama_rs_parsed_chat * llama_rs_parsed_chat_handle; +struct llama_rs_chat_parser; +typedef struct llama_rs_chat_parser * llama_rs_chat_parser_handle; + +typedef enum llama_rs_chat_parser_create_status { + LLAMA_RS_CHAT_PARSER_CREATE_OK = 0, + LLAMA_RS_CHAT_PARSER_CREATE_NULL_MODEL_ARG, + LLAMA_RS_CHAT_PARSER_CREATE_NULL_OUT_PARSER_ARG, + LLAMA_RS_CHAT_PARSER_CREATE_NULL_OUT_ERROR_ARG, + LLAMA_RS_CHAT_PARSER_CREATE_MODEL_HAS_NO_CHAT_TEMPLATE, + LLAMA_RS_CHAT_PARSER_CREATE_MODEL_HAS_NO_VOCAB, + LLAMA_RS_CHAT_PARSER_CREATE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_CHAT_PARSER_CREATE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_chat_parser_create_status; + +llama_rs_chat_parser_create_status llama_rs_chat_parser_create( + const struct llama_model * model, + const char * reasoning_open, + const char * reasoning_close, + llama_rs_chat_parser_handle * out_parser, + char ** out_error); + +typedef enum llama_rs_chat_parser_free_status { + LLAMA_RS_CHAT_PARSER_FREE_OK = 0, + LLAMA_RS_CHAT_PARSER_FREE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_CHAT_PARSER_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION, +} llama_rs_chat_parser_free_status; + +llama_rs_chat_parser_free_status llama_rs_chat_parser_free( + llama_rs_chat_parser_handle parser, + char ** out_error); + typedef enum llama_rs_parse_chat_message_status { LLAMA_RS_PARSE_CHAT_MESSAGE_OK = 0, - LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_MODEL_ARG, + LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_PARSER_ARG, LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG, LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG, LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG, - LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE, - LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB, LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED, LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION, } llama_rs_parse_chat_message_status; llama_rs_parse_chat_message_status llama_rs_parse_chat_message( - const struct llama_model * model, + llama_rs_chat_parser_handle parser, const char * tools_json, const char * input, int is_partial, diff --git a/llama-cpp-bindings-sys/wrapper_common.cpp b/llama-cpp-bindings-sys/wrapper_common.cpp index 50f8f5e8..cc20c9af 100644 --- a/llama-cpp-bindings-sys/wrapper_common.cpp +++ b/llama-cpp-bindings-sys/wrapper_common.cpp @@ -1,39 +1,44 @@ #include "wrapper_common.h" +#include #include #include #include +#include +#include #include #include #include #include -#include +#include #include "llama.cpp/common/common.h" #include "llama.cpp/common/json-schema-to-grammar.h" #include "llama.cpp/include/llama.h" +#include // IWYU pragma: keep +#include #include "wrapper_utils.h" -#include +#include -extern "C" llama_rs_json_schema_to_grammar_status llama_rs_json_schema_to_grammar( +extern "C" auto llama_rs_json_schema_to_grammar( const char * schema_json, bool force_gbnf, char ** out_grammar, - char ** out_error) { - if (out_grammar) { + char ** out_error) -> llama_rs_json_schema_to_grammar_status { + if (out_grammar != nullptr) { *out_grammar = nullptr; } - if (out_error) { + if (out_error != nullptr) { *out_error = nullptr; } - if (!schema_json) { + if (schema_json == nullptr) { return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_SCHEMA_JSON_ARG; } - if (!out_grammar) { + if (out_grammar == nullptr) { return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_GRAMMAR_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_ERROR_ARG; } @@ -41,7 +46,7 @@ extern "C" llama_rs_json_schema_to_grammar_status llama_rs_json_schema_to_gramma const auto schema = nlohmann::ordered_json::parse(schema_json); const auto grammar = json_schema_to_grammar(schema, force_gbnf); *out_grammar = llama_rs_dup_string(grammar); - if (!*out_grammar) { + if (*out_grammar == nullptr) { return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_OK; @@ -49,19 +54,19 @@ extern "C" llama_rs_json_schema_to_grammar_status llama_rs_json_schema_to_gramma return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::invalid_argument & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_INVALID_SCHEMA; } catch (const std::exception & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; @@ -69,32 +74,30 @@ extern "C" llama_rs_json_schema_to_grammar_status llama_rs_json_schema_to_gramma } extern "C" void llama_rs_string_free(char * ptr) { - if (ptr) { - std::free(ptr); - } + const std::unique_ptr reclaimed(ptr); } -extern "C" llama_rs_sampler_init_grammar_status llama_rs_sampler_init_grammar( +extern "C" auto llama_rs_sampler_init_grammar( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, struct llama_sampler ** out_sampler, - char ** out_error) { - if (out_sampler) { + char ** out_error) -> llama_rs_sampler_init_grammar_status { + if (out_sampler != nullptr) { *out_sampler = nullptr; } - if (out_error) { + if (out_error != nullptr) { *out_error = nullptr; } - if (!out_sampler) { + if (out_sampler == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_SAMPLER_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_ERROR_ARG; } try { *out_sampler = llama_sampler_init_grammar(vocab, grammar_str, grammar_root); - if (!*out_sampler) { + if (*out_sampler == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_RETURNED_NULL; } return LLAMA_RS_SAMPLER_INIT_GRAMMAR_OK; @@ -102,20 +105,20 @@ extern "C" llama_rs_sampler_init_grammar_status llama_rs_sampler_init_grammar( return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_sampler_init_grammar_lazy_status llama_rs_sampler_init_grammar_lazy( +extern "C" auto llama_rs_sampler_init_grammar_lazy( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, @@ -124,33 +127,35 @@ extern "C" llama_rs_sampler_init_grammar_lazy_status llama_rs_sampler_init_gramm const llama_token * trigger_tokens, size_t num_trigger_tokens, struct llama_sampler ** out_sampler, - char ** out_error) { - if (out_sampler) { + char ** out_error) -> llama_rs_sampler_init_grammar_lazy_status { + if (out_sampler != nullptr) { *out_sampler = nullptr; } - if (out_error) { + if (out_error != nullptr) { *out_error = nullptr; } - if (!out_sampler) { + if (out_sampler == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_SAMPLER_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_ERROR_ARG; } try { std::vector trigger_patterns; trigger_patterns.reserve(num_trigger_words); - for (size_t i = 0; i < num_trigger_words; ++i) { - const char * word = trigger_words ? trigger_words[i] : nullptr; - if (word && word[0] != '\0') { + const gsl::span words( + trigger_words, trigger_words != nullptr ? num_trigger_words : 0); + for (const char * const word : words) { + if ((word != nullptr) && *word != '\0') { trigger_patterns.push_back(regex_escape(word)); } } - std::vector trigger_patterns_c; - trigger_patterns_c.reserve(trigger_patterns.size()); - for (const auto & pattern : trigger_patterns) { - trigger_patterns_c.push_back(pattern.c_str()); - } + std::vector trigger_patterns_c(trigger_patterns.size()); + std::transform( + trigger_patterns.begin(), + trigger_patterns.end(), + trigger_patterns_c.begin(), + [](const std::string & pattern) -> const char * { return pattern.c_str(); }); *out_sampler = llama_sampler_init_grammar_lazy_patterns( vocab, @@ -160,7 +165,7 @@ extern "C" llama_rs_sampler_init_grammar_lazy_status llama_rs_sampler_init_gramm trigger_patterns_c.size(), trigger_tokens, num_trigger_tokens); - if (!*out_sampler) { + if (*out_sampler == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_RETURNED_NULL; } return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_OK; @@ -168,20 +173,20 @@ extern "C" llama_rs_sampler_init_grammar_lazy_status llama_rs_sampler_init_gramm return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_sampler_init_grammar_lazy_patterns_status llama_rs_sampler_init_grammar_lazy_patterns( +extern "C" auto llama_rs_sampler_init_grammar_lazy_patterns( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, @@ -190,17 +195,17 @@ extern "C" llama_rs_sampler_init_grammar_lazy_patterns_status llama_rs_sampler_i const llama_token * trigger_tokens, size_t num_trigger_tokens, struct llama_sampler ** out_sampler, - char ** out_error) { - if (out_sampler) { + char ** out_error) -> llama_rs_sampler_init_grammar_lazy_patterns_status { + if (out_sampler != nullptr) { *out_sampler = nullptr; } - if (out_error) { + if (out_error != nullptr) { *out_error = nullptr; } - if (!out_sampler) { + if (out_sampler == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_SAMPLER_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_ERROR_ARG; } try { @@ -212,7 +217,7 @@ extern "C" llama_rs_sampler_init_grammar_lazy_patterns_status llama_rs_sampler_i num_trigger_patterns, trigger_tokens, num_trigger_tokens); - if (!*out_sampler) { + if (*out_sampler == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_RETURNED_NULL; } return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_OK; @@ -220,37 +225,37 @@ extern "C" llama_rs_sampler_init_grammar_lazy_patterns_status llama_rs_sampler_i return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::regex_error & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_INVALID_TRIGGER_PATTERN; } catch (const std::exception & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_pos llama_rs_memory_seq_pos_max( - struct llama_context * ctx, - llama_seq_id seq_id) { - if (!ctx) { +extern "C" auto llama_rs_memory_seq_pos_max( + const struct llama_context * ctx, + llama_seq_id seq_id) -> llama_pos { + if (ctx == nullptr) { return -1; } try { auto * mem = llama_get_memory(ctx); - if (!mem) { + if (mem == nullptr) { return -1; } - uint32_t n_seq_max = llama_n_seq_max(ctx); + uint32_t const n_seq_max = llama_n_seq_max(ctx); if (seq_id < 0 || (uint32_t) seq_id >= n_seq_max) { return -1; } @@ -261,18 +266,18 @@ extern "C" llama_pos llama_rs_memory_seq_pos_max( } } -extern "C" llama_rs_encode_status llama_rs_encode( +extern "C" auto llama_rs_encode( struct llama_context * ctx, struct llama_batch batch, int32_t * out_vendored_return_code, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_encode_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_vendored_return_code) { + if (out_vendored_return_code != nullptr) { *out_vendored_return_code = 0; } - if (!ctx) { + if (ctx == nullptr) { return LLAMA_RS_ENCODE_NULL_CTX_ARG; } try { @@ -280,9 +285,9 @@ extern "C" llama_rs_encode_status llama_rs_encode( if (!llama_model_has_encoder(model)) { return LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER; } - int32_t result = llama_encode(ctx, batch); + int32_t const result = llama_encode(ctx, batch); if (result != 0) { - if (out_vendored_return_code) { + if (out_vendored_return_code != nullptr) { *out_vendored_return_code = result; } if (result == -2) { @@ -297,17 +302,17 @@ extern "C" llama_rs_encode_status llama_rs_encode( } catch (const std::bad_alloc &) { return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED; } } @@ -315,17 +320,17 @@ extern "C" llama_rs_encode_status llama_rs_encode( } } -extern "C" llama_rs_memory_seq_add_status llama_rs_memory_seq_add( - struct llama_context * ctx, +extern "C" auto llama_rs_memory_seq_add( + const struct llama_context * ctx, llama_seq_id seq_id, - llama_pos p0, - llama_pos p1, + llama_pos pos_start, + llama_pos pos_end, llama_pos shift, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_memory_seq_add_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (!ctx) { + if (ctx == nullptr) { return LLAMA_RS_MEMORY_SEQ_ADD_NULL_CTX_ARG; } try { @@ -335,25 +340,25 @@ extern "C" llama_rs_memory_seq_add_status llama_rs_memory_seq_add( return LLAMA_RS_MEMORY_SEQ_ADD_INCOMPATIBLE_ROPE_TYPE; } auto * mem = llama_get_memory(ctx); - if (!mem) { + if (mem == nullptr) { return LLAMA_RS_MEMORY_SEQ_ADD_NULL_MEM; } - llama_memory_seq_add(mem, seq_id, p0, p1, shift); + llama_memory_seq_add(mem, seq_id, pos_start, pos_end, shift); return LLAMA_RS_MEMORY_SEQ_ADD_OK; } catch (const std::bad_alloc &) { return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED; } } @@ -361,17 +366,17 @@ extern "C" llama_rs_memory_seq_add_status llama_rs_memory_seq_add( } } -extern "C" llama_rs_memory_seq_div_status llama_rs_memory_seq_div( - struct llama_context * ctx, +extern "C" auto llama_rs_memory_seq_div( + const struct llama_context * ctx, llama_seq_id seq_id, - llama_pos p0, - llama_pos p1, - int d, - char ** out_error) { - if (out_error) { + llama_pos pos_start, + llama_pos pos_end, + int divisor, + char ** out_error) -> llama_rs_memory_seq_div_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (!ctx) { + if (ctx == nullptr) { return LLAMA_RS_MEMORY_SEQ_DIV_NULL_CTX_ARG; } try { @@ -381,25 +386,25 @@ extern "C" llama_rs_memory_seq_div_status llama_rs_memory_seq_div( return LLAMA_RS_MEMORY_SEQ_DIV_INCOMPATIBLE_ROPE_TYPE; } auto * mem = llama_get_memory(ctx); - if (!mem) { + if (mem == nullptr) { return LLAMA_RS_MEMORY_SEQ_DIV_NULL_MEM; } - llama_memory_seq_div(mem, seq_id, p0, p1, d); + llama_memory_seq_div(mem, seq_id, pos_start, pos_end, divisor); return LLAMA_RS_MEMORY_SEQ_DIV_OK; } catch (const std::bad_alloc &) { return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED; } } @@ -407,25 +412,25 @@ extern "C" llama_rs_memory_seq_div_status llama_rs_memory_seq_div( } } -extern "C" llama_rs_sampler_sample_status llama_rs_sampler_sample( +extern "C" auto llama_rs_sampler_sample( struct llama_sampler * sampler, struct llama_context * ctx, int32_t idx, llama_token * out_token, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_sampler_sample_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (!sampler) { + if (sampler == nullptr) { return LLAMA_RS_SAMPLER_SAMPLE_NULL_SAMPLER_ARG; } - if (!ctx) { + if (ctx == nullptr) { return LLAMA_RS_SAMPLER_SAMPLE_NULL_CTX_ARG; } - if (!out_token) { + if (out_token == nullptr) { return LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_TOKEN_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_ERROR_ARG; } try { @@ -435,30 +440,30 @@ extern "C" llama_rs_sampler_sample_status llama_rs_sampler_sample( return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_sampler_accept_status llama_rs_sampler_accept( +extern "C" auto llama_rs_sampler_accept( struct llama_sampler * sampler, llama_token token, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_sampler_accept_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (!sampler) { + if (sampler == nullptr) { return LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_SAMPLER_ACCEPT_NULL_OUT_ERROR_ARG; } try { @@ -468,42 +473,42 @@ extern "C" llama_rs_sampler_accept_status llama_rs_sampler_accept( return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_load_model_from_file_status llama_rs_load_model_from_file( +extern "C" auto llama_rs_load_model_from_file( const char * path, struct llama_model_params params, struct llama_model ** out_model, - char ** out_error) { - if (out_model) { + char ** out_error) -> llama_rs_load_model_from_file_status { + if (out_model != nullptr) { *out_model = nullptr; } - if (out_error) { + if (out_error != nullptr) { *out_error = nullptr; } - if (!path) { + if (path == nullptr) { return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_PATH_ARG; } - if (!out_model) { + if (out_model == nullptr) { return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_MODEL_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_ERROR_ARG; } try { - *out_model = llama_load_model_from_file(path, params); - if (!*out_model) { + *out_model = llama_model_load_from_file(path, params); + if (*out_model == nullptr) { return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL; } return LLAMA_RS_LOAD_MODEL_FROM_FILE_OK; @@ -511,42 +516,42 @@ extern "C" llama_rs_load_model_from_file_status llama_rs_load_model_from_file( return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_new_context_with_model_status llama_rs_new_context_with_model( +extern "C" auto llama_rs_new_context_with_model( struct llama_model * model, struct llama_context_params params, struct llama_context ** out_ctx, - char ** out_error) { - if (out_ctx) { + char ** out_error) -> llama_rs_new_context_with_model_status { + if (out_ctx != nullptr) { *out_ctx = nullptr; } - if (out_error) { + if (out_error != nullptr) { *out_error = nullptr; } - if (!model) { + if (model == nullptr) { return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_MODEL_ARG; } - if (!out_ctx) { + if (out_ctx == nullptr) { return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_CTX_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_ERROR_ARG; } try { - *out_ctx = llama_new_context_with_model(model, params); - if (!*out_ctx) { + *out_ctx = llama_init_from_model(model, params); + if (*out_ctx == nullptr) { return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_RETURNED_NULL; } return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_OK; @@ -554,40 +559,40 @@ extern "C" llama_rs_new_context_with_model_status llama_rs_new_context_with_mode return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_decode_status llama_rs_decode( +extern "C" auto llama_rs_decode( struct llama_context * ctx, struct llama_batch batch, int32_t * out_vendored_return_code, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_decode_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_vendored_return_code) { + if (out_vendored_return_code != nullptr) { *out_vendored_return_code = 0; } - if (!ctx) { + if (ctx == nullptr) { return LLAMA_RS_DECODE_NULL_CTX_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_DECODE_NULL_OUT_ERROR_ARG; } try { - int32_t result = llama_decode(ctx, batch); + int32_t const result = llama_decode(ctx, batch); if (result != 0) { - if (out_vendored_return_code) { + if (out_vendored_return_code != nullptr) { *out_vendored_return_code = result; } if (result == -2) { @@ -603,20 +608,20 @@ extern "C" llama_rs_decode_status llama_rs_decode( return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_tokenize_status llama_rs_tokenize( +extern "C" auto llama_rs_tokenize( const struct llama_vocab * vocab, const char * text, int32_t text_len, @@ -625,27 +630,27 @@ extern "C" llama_rs_tokenize_status llama_rs_tokenize( bool add_special, bool parse_special, int32_t * out_returned_count, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_tokenize_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_returned_count) { + if (out_returned_count != nullptr) { *out_returned_count = 0; } - if (!vocab) { + if (vocab == nullptr) { return LLAMA_RS_TOKENIZE_NULL_VOCAB_ARG; } - if (!text) { + if (text == nullptr) { return LLAMA_RS_TOKENIZE_NULL_TEXT_ARG; } - if (!out_returned_count) { + if (out_returned_count == nullptr) { return LLAMA_RS_TOKENIZE_NULL_OUT_RETURNED_COUNT_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_TOKENIZE_NULL_OUT_ERROR_ARG; } try { - int32_t count = llama_tokenize( + int32_t const count = llama_tokenize( vocab, text, text_len, tokens, n_tokens_max, add_special, parse_special); *out_returned_count = count; return LLAMA_RS_TOKENIZE_OK; @@ -653,33 +658,33 @@ extern "C" llama_rs_tokenize_status llama_rs_tokenize( return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_sampler_apply_status llama_rs_sampler_apply( +extern "C" auto llama_rs_sampler_apply( struct llama_sampler * sampler, struct llama_token_data_array * data_array, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_sampler_apply_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (!sampler) { + if (sampler == nullptr) { return LLAMA_RS_SAMPLER_APPLY_NULL_SAMPLER_ARG; } - if (!data_array) { + if (data_array == nullptr) { return LLAMA_RS_SAMPLER_APPLY_NULL_DATA_ARRAY_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_SAMPLER_APPLY_NULL_OUT_ERROR_ARG; } try { @@ -689,13 +694,13 @@ extern "C" llama_rs_sampler_apply_status llama_rs_sampler_apply( return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION; diff --git a/llama-cpp-bindings-sys/wrapper_common.h b/llama-cpp-bindings-sys/wrapper_common.h index f790408a..7896e200 100644 --- a/llama-cpp-bindings-sys/wrapper_common.h +++ b/llama-cpp-bindings-sys/wrapper_common.h @@ -122,7 +122,7 @@ llama_rs_sampler_sample_status llama_rs_sampler_sample( void llama_rs_string_free(char * ptr); llama_pos llama_rs_memory_seq_pos_max( - struct llama_context * ctx, + const struct llama_context * ctx, llama_seq_id seq_id); typedef enum llama_rs_encode_status { @@ -152,10 +152,10 @@ typedef enum llama_rs_memory_seq_add_status { } llama_rs_memory_seq_add_status; llama_rs_memory_seq_add_status llama_rs_memory_seq_add( - struct llama_context * ctx, + const struct llama_context * ctx, llama_seq_id seq_id, - llama_pos p0, - llama_pos p1, + llama_pos pos_start, + llama_pos pos_end, llama_pos shift, char ** out_error); @@ -169,11 +169,11 @@ typedef enum llama_rs_memory_seq_div_status { } llama_rs_memory_seq_div_status; llama_rs_memory_seq_div_status llama_rs_memory_seq_div( - struct llama_context * ctx, + const struct llama_context * ctx, llama_seq_id seq_id, - llama_pos p0, - llama_pos p1, - int d, + llama_pos pos_start, + llama_pos pos_end, + int divisor, char ** out_error); typedef enum llama_rs_load_model_from_file_status { diff --git a/llama-cpp-bindings-sys/wrapper_fit.cpp b/llama-cpp-bindings-sys/wrapper_fit.cpp index 02eee839..5360b337 100644 --- a/llama-cpp-bindings-sys/wrapper_fit.cpp +++ b/llama-cpp-bindings-sys/wrapper_fit.cpp @@ -1,12 +1,16 @@ #include "wrapper_fit.h" +#include "llama.h" +#include "ggml.h" #include "wrapper_utils.h" +#include +#include #include #include #include "llama.cpp/common/fit.h" -extern "C" llama_rs_fit_params_status llama_rs_fit_params( +extern "C" auto llama_rs_fit_params( const char * path_model, struct llama_model_params * mparams, struct llama_context_params * cparams, @@ -16,11 +20,11 @@ extern "C" llama_rs_fit_params_status llama_rs_fit_params( uint32_t n_ctx_min, enum ggml_log_level log_level, int32_t * out_unrecognized_status_code, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_fit_params_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_unrecognized_status_code) { + if (out_unrecognized_status_code != nullptr) { *out_unrecognized_status_code = 0; } @@ -36,24 +40,24 @@ extern "C" llama_rs_fit_params_status llama_rs_fit_params( case COMMON_PARAMS_FIT_STATUS_ERROR: return LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR; } - if (out_unrecognized_status_code) { + if (out_unrecognized_status_code != nullptr) { *out_unrecognized_status_code = static_cast(status); } return LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE; } catch (const std::bad_alloc &) { return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED; } } diff --git a/llama-cpp-bindings-sys/wrapper_mtmd.cpp b/llama-cpp-bindings-sys/wrapper_mtmd.cpp index bff5b958..1a562664 100644 --- a/llama-cpp-bindings-sys/wrapper_mtmd.cpp +++ b/llama-cpp-bindings-sys/wrapper_mtmd.cpp @@ -1,33 +1,38 @@ #include "wrapper_mtmd.h" +#include "llama.h" +#include "tools/mtmd/mtmd.h" +#include "tools/mtmd/mtmd-helper.h" #include "wrapper_utils.h" +#include +#include #include #include #include -extern "C" llama_rs_mtmd_init_from_file_status llama_rs_mtmd_init_from_file( +extern "C" auto llama_rs_mtmd_init_from_file( const char * mmproj_path, const struct llama_model * text_model, struct mtmd_context_params ctx_params, struct mtmd_context ** out_ctx, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_mtmd_init_from_file_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (!out_ctx) { + if (out_ctx == nullptr) { return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_OUT_CTX_ARG; } *out_ctx = nullptr; - if (!mmproj_path) { + if (mmproj_path == nullptr) { return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG; } - if (!text_model) { + if (text_model == nullptr) { return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_TEXT_MODEL_ARG; } try { struct mtmd_context * ctx = mtmd_init_from_file(mmproj_path, text_model, ctx_params); - if (!ctx) { + if (ctx == nullptr) { return LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_RETURNED_NULL; } *out_ctx = ctx; @@ -35,17 +40,17 @@ extern "C" llama_rs_mtmd_init_from_file_status llama_rs_mtmd_init_from_file( } catch (const std::bad_alloc &) { return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; } } @@ -53,28 +58,30 @@ extern "C" llama_rs_mtmd_init_from_file_status llama_rs_mtmd_init_from_file( } } -extern "C" llama_rs_mtmd_bitmap_init_from_file_status llama_rs_mtmd_bitmap_init_from_file( +extern "C" auto llama_rs_mtmd_bitmap_init_from_file( struct mtmd_context * ctx, const char * fname, struct mtmd_bitmap ** out_bitmap, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_mtmd_bitmap_init_from_file_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (!out_bitmap) { + if (out_bitmap == nullptr) { return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_OUT_BITMAP_ARG; } *out_bitmap = nullptr; - if (!ctx) { + if (ctx == nullptr) { return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_CTX_ARG; } - if (!fname) { + if (fname == nullptr) { return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_FNAME_ARG; } try { - struct mtmd_bitmap * bitmap = mtmd_helper_bitmap_init_from_file(ctx, fname); - if (!bitmap) { + struct mtmd_helper_bitmap_wrapper const bitmap_wrapper = + mtmd_helper_bitmap_init_from_file(ctx, fname, false); + struct mtmd_bitmap * bitmap = bitmap_wrapper.bitmap; + if (bitmap == nullptr) { return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_RETURNED_NULL; } *out_bitmap = bitmap; @@ -82,17 +89,17 @@ extern "C" llama_rs_mtmd_bitmap_init_from_file_status llama_rs_mtmd_bitmap_init_ } catch (const std::bad_alloc &) { return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; } } @@ -100,35 +107,35 @@ extern "C" llama_rs_mtmd_bitmap_init_from_file_status llama_rs_mtmd_bitmap_init_ } } -extern "C" llama_rs_mtmd_tokenize_status llama_rs_mtmd_tokenize( +extern "C" auto llama_rs_mtmd_tokenize( struct mtmd_context * ctx, struct mtmd_input_chunks * output, const struct mtmd_input_text * text, const struct mtmd_bitmap ** bitmaps, size_t num_bitmaps, int32_t * out_undocumented_return_code, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_mtmd_tokenize_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_undocumented_return_code) { + if (out_undocumented_return_code != nullptr) { *out_undocumented_return_code = 0; } - if (!ctx) { + if (ctx == nullptr) { return LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG; } - if (!output) { + if (output == nullptr) { return LLAMA_RS_MTMD_TOKENIZE_NULL_OUTPUT_ARG; } - if (!text) { + if (text == nullptr) { return LLAMA_RS_MTMD_TOKENIZE_NULL_TEXT_ARG; } - if (num_bitmaps > 0 && !bitmaps) { + if (num_bitmaps > 0 && (bitmaps == nullptr)) { return LLAMA_RS_MTMD_TOKENIZE_NULL_BITMAPS_ARG_WHEN_NUM_BITMAPS_NONZERO; } try { - int32_t result = mtmd_tokenize(ctx, output, text, bitmaps, num_bitmaps); + int32_t const result = mtmd_tokenize(ctx, output, text, bitmaps, num_bitmaps); switch (result) { case 0: return LLAMA_RS_MTMD_TOKENIZE_OK; @@ -137,7 +144,7 @@ extern "C" llama_rs_mtmd_tokenize_status llama_rs_mtmd_tokenize( case 2: return LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR; default: - if (out_undocumented_return_code) { + if (out_undocumented_return_code != nullptr) { *out_undocumented_return_code = result; } return LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE; @@ -145,17 +152,17 @@ extern "C" llama_rs_mtmd_tokenize_status llama_rs_mtmd_tokenize( } catch (const std::bad_alloc &) { return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; } } @@ -163,28 +170,28 @@ extern "C" llama_rs_mtmd_tokenize_status llama_rs_mtmd_tokenize( } } -extern "C" llama_rs_mtmd_encode_chunk_status llama_rs_mtmd_encode_chunk( +extern "C" auto llama_rs_mtmd_encode_chunk( struct mtmd_context * ctx, const struct mtmd_input_chunk * chunk, int32_t * out_vendored_return_code, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_mtmd_encode_chunk_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_vendored_return_code) { + if (out_vendored_return_code != nullptr) { *out_vendored_return_code = 0; } - if (!ctx) { + if (ctx == nullptr) { return LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG; } - if (!chunk) { + if (chunk == nullptr) { return LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG; } try { - int32_t result = mtmd_encode_chunk(ctx, chunk); + int32_t const result = mtmd_encode_chunk(ctx, chunk); if (result != 0) { - if (out_vendored_return_code) { + if (out_vendored_return_code != nullptr) { *out_vendored_return_code = result; } return LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE; @@ -193,17 +200,17 @@ extern "C" llama_rs_mtmd_encode_chunk_status llama_rs_mtmd_encode_chunk( } catch (const std::bad_alloc &) { return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED; } } @@ -211,7 +218,7 @@ extern "C" llama_rs_mtmd_encode_chunk_status llama_rs_mtmd_encode_chunk( } } -extern "C" llama_rs_mtmd_eval_chunk_single_status llama_rs_mtmd_eval_chunk_single( +extern "C" auto llama_rs_mtmd_eval_chunk_single( struct mtmd_context * ctx, struct llama_context * lctx, const struct mtmd_input_chunk * chunk, @@ -221,31 +228,31 @@ extern "C" llama_rs_mtmd_eval_chunk_single_status llama_rs_mtmd_eval_chunk_singl bool logits_last, llama_pos * out_new_n_past, int32_t * out_vendored_return_code, - char ** out_error) { - if (out_error) { + char ** out_error) -> llama_rs_mtmd_eval_chunk_single_status { + if (out_error != nullptr) { *out_error = nullptr; } - if (out_vendored_return_code) { + if (out_vendored_return_code != nullptr) { *out_vendored_return_code = 0; } - if (!ctx) { + if (ctx == nullptr) { return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_MTMD_CTX_ARG; } - if (!lctx) { + if (lctx == nullptr) { return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_LLAMA_CTX_ARG; } - if (!chunk) { + if (chunk == nullptr) { return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_CHUNK_ARG; } - if (!out_new_n_past) { + if (out_new_n_past == nullptr) { return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_OUT_NEW_N_PAST_ARG; } try { - int32_t result = mtmd_helper_eval_chunk_single( + int32_t const result = mtmd_helper_eval_chunk_single( ctx, lctx, chunk, n_past, seq_id, n_batch, logits_last, out_new_n_past); if (result != 0) { - if (out_vendored_return_code) { + if (out_vendored_return_code != nullptr) { *out_vendored_return_code = result; } return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_RETURNED_NONZERO_CODE; @@ -254,17 +261,17 @@ extern "C" llama_rs_mtmd_eval_chunk_single_status llama_rs_mtmd_eval_chunk_singl } catch (const std::bad_alloc &) { return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string(err.what()); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED; } } return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - if (out_error) { + if (out_error != nullptr) { *out_error = llama_rs_dup_string("unknown c++ exception"); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED; } } diff --git a/llama-cpp-bindings-sys/wrapper_reasoning.cpp b/llama-cpp-bindings-sys/wrapper_reasoning.cpp index 7970b4ee..5fcf9094 100644 --- a/llama-cpp-bindings-sys/wrapper_reasoning.cpp +++ b/llama-cpp-bindings-sys/wrapper_reasoning.cpp @@ -3,155 +3,221 @@ #include "llama.cpp/common/chat-auto-parser.h" #include "llama.cpp/common/chat.h" #include "llama.cpp/include/llama.h" -#include "marker_probes/marker_probe.h" +#include // IWYU pragma: keep +#include +#include "wrapper_utils.h" #include +#include #include -#include #include +#include namespace { -std::string token_text_or_empty(const llama_vocab * vocab, llama_token token) { +auto token_text_or_empty(const llama_vocab * vocab, llama_token token) -> std::string { if (token == LLAMA_TOKEN_NULL) { return {}; } const char * text = llama_vocab_get_text(vocab, token); - if (!text) { + if (text == nullptr) { return {}; } - return std::string(text); + return {text}; +} + +auto find_reasoning_markers( + const common_chat_template & tmpl, + const char * tmpl_src, + std::string * out_start, + std::string * out_end) -> bool { + autoparser::generation_params probe_params; + probe_params.add_generation_prompt = true; + probe_params.enable_thinking = true; + probe_params.is_inference = false; + probe_params.add_inference = false; + probe_params.mark_input = false; + probe_params.messages = nlohmann::ordered_json::array({ + nlohmann::ordered_json{ { "role", "user" }, { "content", "ping" } }, + }); + + const std::string tmpl_src_str = tmpl_src; + if (auto specialized = common_chat_try_specialized_template(tmpl, tmpl_src_str, probe_params)) { + if (specialized->supports_thinking + && !specialized->thinking_start_tag.empty() + && !specialized->thinking_end_tag.empty()) { + *out_start = std::move(specialized->thinking_start_tag); + *out_end = std::move(specialized->thinking_end_tag); + return true; + } + } + + autoparser::autoparser parser; + parser.analyze_template(tmpl); + if (parser.reasoning.mode != autoparser::reasoning_mode::NONE + && !parser.reasoning.start.empty() + && !parser.reasoning.end.empty()) { + *out_start = std::move(parser.reasoning.start); + *out_end = std::move(parser.reasoning.end); + return true; + } + + return false; } } // namespace -extern "C" llama_rs_detect_reasoning_markers_status llama_rs_detect_reasoning_markers( +extern "C" auto llama_rs_detect_reasoning_markers( const struct llama_model * model, char ** out_open, char ** out_close, - char ** out_error) { - if (out_open) { + char ** out_error) -> llama_rs_detect_reasoning_markers_status { + if (out_open != nullptr) { *out_open = nullptr; } - if (out_close) { + if (out_close != nullptr) { *out_close = nullptr; } - if (out_error) { + if (out_error != nullptr) { *out_error = nullptr; } - if (!model) { + if (model == nullptr) { return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_MODEL_ARG; } - if (!out_open) { + if (out_open == nullptr) { return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_OPEN_ARG; } - if (!out_close) { + if (out_close == nullptr) { return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_CLOSE_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); - if (!tmpl_src) { + if (tmpl_src == nullptr) { return LLAMA_RS_DETECT_REASONING_MARKERS_OK; } const llama_vocab * vocab = llama_model_get_vocab(model); - if (!vocab) { + if (vocab == nullptr) { return LLAMA_RS_DETECT_REASONING_MARKERS_OK; } - std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); - std::string eos_token = token_text_or_empty(vocab, llama_vocab_eos(vocab)); + std::string const bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); + std::string const eos_token = token_text_or_empty(vocab, llama_vocab_eos(vocab)); - common_chat_template tmpl(tmpl_src, bos_token, eos_token); + common_chat_template const tmpl(tmpl_src, bos_token, eos_token); std::string detected_start; std::string detected_end; - bool detected = false; - - autoparser::generation_params probe_params; - probe_params.add_generation_prompt = true; - probe_params.enable_thinking = true; - probe_params.is_inference = false; - probe_params.add_inference = false; - probe_params.mark_input = false; - probe_params.messages = nlohmann::ordered_json::array({ - nlohmann::ordered_json{ { "role", "user" }, { "content", "ping" } }, - }); - - const std::string tmpl_src_str = tmpl_src; - if (auto specialized = common_chat_try_specialized_template(tmpl, tmpl_src_str, probe_params)) { - if (specialized->supports_thinking - && !specialized->thinking_start_tag.empty() - && !specialized->thinking_end_tag.empty()) { - detected_start = std::move(specialized->thinking_start_tag); - detected_end = std::move(specialized->thinking_end_tag); - detected = true; - } - } - - if (!detected) { - autoparser::autoparser parser; - parser.analyze_template(tmpl); - - if (parser.reasoning.mode != autoparser::reasoning_mode::NONE - && !parser.reasoning.start.empty() - && !parser.reasoning.end.empty()) { - detected_start = std::move(parser.reasoning.start); - detected_end = std::move(parser.reasoning.end); - detected = true; - } - } - - if (!detected) { - for (auto probe : marker_probes::registered()) { - auto fallback = probe(tmpl); - if (fallback.found) { - detected_start = std::move(fallback.start); - detected_end = std::move(fallback.end); - detected = true; - break; - } - } - } - - if (!detected) { + if (!find_reasoning_markers(tmpl, tmpl_src, &detected_start, &detected_end)) { return LLAMA_RS_DETECT_REASONING_MARKERS_OK; } - char * open_dup = llama_rs_dup_string(detected_start); - char * close_dup = llama_rs_dup_string(detected_end); - - if (!open_dup || !close_dup) { - std::free(open_dup); - std::free(close_dup); + std::unique_ptr open_dup(llama_rs_dup_string(detected_start)); + std::unique_ptr close_dup(llama_rs_dup_string(detected_end)); + if ((open_dup == nullptr) || (close_dup == nullptr)) { return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; } - *out_open = open_dup; - *out_close = close_dup; + *out_open = open_dup.release(); + *out_close = close_dup.release(); return LLAMA_RS_DETECT_REASONING_MARKERS_OK; } catch (const std::bad_alloc &) { return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION; } } +extern "C" auto llama_rs_render_chat_template( + const struct llama_model * model, + const char * messages_json, + int add_generation_prompt, + int enable_thinking, + char ** out_rendered, + char ** out_error) -> llama_rs_render_chat_template_status { + if (out_rendered != nullptr) { + *out_rendered = nullptr; + } + if (out_error != nullptr) { + *out_error = nullptr; + } + if (model == nullptr) { + return LLAMA_RS_RENDER_CHAT_TEMPLATE_NULL_MODEL_ARG; + } + if (messages_json == nullptr) { + return LLAMA_RS_RENDER_CHAT_TEMPLATE_NULL_MESSAGES_ARG; + } + if (out_rendered == nullptr) { + return LLAMA_RS_RENDER_CHAT_TEMPLATE_NULL_OUT_RENDERED_ARG; + } + if (out_error == nullptr) { + return LLAMA_RS_RENDER_CHAT_TEMPLATE_NULL_OUT_ERROR_ARG; + } + + try { + const char * tmpl_src = llama_model_chat_template(model, nullptr); + if (tmpl_src == nullptr) { + return LLAMA_RS_RENDER_CHAT_TEMPLATE_MODEL_HAS_NO_CHAT_TEMPLATE; + } + + const llama_vocab * vocab = llama_model_get_vocab(model); + if (vocab == nullptr) { + return LLAMA_RS_RENDER_CHAT_TEMPLATE_MODEL_HAS_NO_VOCAB; + } + + std::string const bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); + std::string const eos_token = token_text_or_empty(vocab, llama_vocab_eos(vocab)); + + common_chat_template const tmpl(tmpl_src, bos_token, eos_token); + + autoparser::generation_params params; + params.add_generation_prompt = (add_generation_prompt != 0); + params.enable_thinking = (enable_thinking != 0); + params.is_inference = false; + params.add_inference = false; + params.mark_input = false; + params.messages = nlohmann::ordered_json::parse(messages_json); + + std::string const rendered = common_chat_template_direct_apply(tmpl, params); + + *out_rendered = llama_rs_dup_string(rendered); + if (*out_rendered == nullptr) { + return LLAMA_RS_RENDER_CHAT_TEMPLATE_ERROR_STRING_ALLOCATION_FAILED; + } + + return LLAMA_RS_RENDER_CHAT_TEMPLATE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_RENDER_CHAT_TEMPLATE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & ex) { + *out_error = llama_rs_dup_string(std::string(ex.what())); + if (*out_error == nullptr) { + return LLAMA_RS_RENDER_CHAT_TEMPLATE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_RENDER_CHAT_TEMPLATE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); + if (*out_error == nullptr) { + return LLAMA_RS_RENDER_CHAT_TEMPLATE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_RENDER_CHAT_TEMPLATE_VENDORED_THREW_CXX_EXCEPTION; + } +} + diff --git a/llama-cpp-bindings-sys/wrapper_reasoning.h b/llama-cpp-bindings-sys/wrapper_reasoning.h index a22f79ba..acf38396 100644 --- a/llama-cpp-bindings-sys/wrapper_reasoning.h +++ b/llama-cpp-bindings-sys/wrapper_reasoning.h @@ -23,6 +23,26 @@ llama_rs_detect_reasoning_markers_status llama_rs_detect_reasoning_markers( char ** out_close, char ** out_error); +typedef enum llama_rs_render_chat_template_status { + LLAMA_RS_RENDER_CHAT_TEMPLATE_OK = 0, + LLAMA_RS_RENDER_CHAT_TEMPLATE_NULL_MODEL_ARG, + LLAMA_RS_RENDER_CHAT_TEMPLATE_NULL_MESSAGES_ARG, + LLAMA_RS_RENDER_CHAT_TEMPLATE_NULL_OUT_RENDERED_ARG, + LLAMA_RS_RENDER_CHAT_TEMPLATE_NULL_OUT_ERROR_ARG, + LLAMA_RS_RENDER_CHAT_TEMPLATE_MODEL_HAS_NO_CHAT_TEMPLATE, + LLAMA_RS_RENDER_CHAT_TEMPLATE_MODEL_HAS_NO_VOCAB, + LLAMA_RS_RENDER_CHAT_TEMPLATE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_RENDER_CHAT_TEMPLATE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_render_chat_template_status; + +llama_rs_render_chat_template_status llama_rs_render_chat_template( + const struct llama_model * model, + const char * messages_json, + int add_generation_prompt, + int enable_thinking, + char ** out_rendered, + char ** out_error); + #ifdef __cplusplus } #endif diff --git a/llama-cpp-bindings-sys/wrapper_token_text.cpp b/llama-cpp-bindings-sys/wrapper_token_text.cpp index 78fbcddf..7719e185 100644 --- a/llama-cpp-bindings-sys/wrapper_token_text.cpp +++ b/llama-cpp-bindings-sys/wrapper_token_text.cpp @@ -1,18 +1,20 @@ #include "wrapper_token_text.h" +#include "llama.h" +#include namespace wrapper_helpers { -std::string token_text_or_empty(const llama_vocab * vocab, llama_token token) { +auto token_text_or_empty(const llama_vocab * vocab, llama_token token) -> std::string { if (token == LLAMA_TOKEN_NULL) { return {}; } const char * text = llama_vocab_get_text(vocab, token); - if (!text) { + if (text == nullptr) { return {}; } - return std::string(text); + return {text}; } -} +} // namespace wrapper_helpers diff --git a/llama-cpp-bindings-sys/wrapper_tool_calls.cpp b/llama-cpp-bindings-sys/wrapper_tool_calls.cpp index 54b3a999..0d3b7cc4 100644 --- a/llama-cpp-bindings-sys/wrapper_tool_calls.cpp +++ b/llama-cpp-bindings-sys/wrapper_tool_calls.cpp @@ -1,14 +1,17 @@ #include "wrapper_tool_calls.h" +#include // IWYU pragma: keep +#include #include "wrapper_token_text.h" #include "llama.cpp/common/chat-auto-parser.h" #include "llama.cpp/common/chat-auto-parser-helpers.h" #include "llama.cpp/common/chat.h" #include "llama.cpp/include/llama.h" +#include "wrapper_utils.h" #include +#include #include -#include #include using wrapper_helpers::token_text_or_empty; @@ -24,18 +27,18 @@ namespace { // detected markers come from the model's actual template behavior, not from a // hardcoded list), but use plain-ASCII synthetic names where the upstream // autoparser uses sentinel strings that some Jinja templates choke on. -std::string detect_tool_call_haystack( +auto detect_tool_call_haystack( const common_chat_template & tmpl, - const autoparser::analyze_reasoning & reasoning) { - nlohmann::ordered_json user_msg = { + const autoparser::analyze_reasoning & reasoning) -> std::string { + nlohmann::ordered_json const user_msg = { { "role", "user" }, { "content", "Please use the tool" } }; - nlohmann::ordered_json assistant_no_tools = { + nlohmann::ordered_json const assistant_no_tools = { { "role", "assistant" }, { "content", "Sure, calling." } }; - nlohmann::ordered_json first_tool_call = { + nlohmann::ordered_json const first_tool_call = { { "id", "call_001" }, { "type", "function" }, { "function", { @@ -46,12 +49,12 @@ std::string detect_tool_call_haystack( }} }} }; - nlohmann::ordered_json assistant_with_tools = { + nlohmann::ordered_json const assistant_with_tools = { { "role", "assistant" }, { "content", "" }, { "tool_calls", nlohmann::ordered_json::array({ first_tool_call }) } }; - nlohmann::ordered_json tool_definition = { + nlohmann::ordered_json const tool_definition = { { "type", "function" }, { "function", { { "name", "tool_first" }, @@ -77,26 +80,26 @@ std::string detect_tool_call_haystack( params_with_tools.messages = nlohmann::ordered_json::array({ user_msg, assistant_with_tools }); - std::string output_no_tools = autoparser::apply_template(tmpl, params_no_tools); - std::string output_with_tools = autoparser::apply_template(tmpl, params_with_tools); + std::string const output_no_tools = autoparser::apply_template(tmpl, params_no_tools); + std::string const output_with_tools = autoparser::apply_template(tmpl, params_with_tools); if (output_no_tools.empty() || output_with_tools.empty()) { return {}; } - diff_split diff = calculate_diff_split(output_no_tools, output_with_tools); + diff_split const diff = calculate_diff_split(output_no_tools, output_with_tools); std::string haystack = diff.right; // Strip reasoning markers so the surrounding tool-call markers can be // located reliably — the autoparser does the same for the JSON-native // path. - auto remove_first = [&haystack](const std::string & needle) { + auto remove_first = [&haystack](const std::string & needle) -> void { if (needle.empty()) { return; } auto pos = haystack.find(needle); if (pos != std::string::npos) { - haystack = haystack.substr(0, pos) + haystack.substr(pos + needle.length()); + haystack.erase(pos, needle.length()); } }; @@ -108,51 +111,51 @@ std::string detect_tool_call_haystack( } // namespace -extern "C" llama_rs_compute_tool_call_haystack_status llama_rs_compute_tool_call_haystack( +extern "C" auto llama_rs_compute_tool_call_haystack( const struct llama_model * model, char ** out_haystack, - char ** out_error) { - if (out_haystack) { + char ** out_error) -> llama_rs_compute_tool_call_haystack_status { + if (out_haystack != nullptr) { *out_haystack = nullptr; } - if (out_error) { + if (out_error != nullptr) { *out_error = nullptr; } - if (!model) { + if (model == nullptr) { return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_MODEL_ARG; } - if (!out_haystack) { + if (out_haystack == nullptr) { return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_HAYSTACK_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); - if (!tmpl_src) { + if (tmpl_src == nullptr) { return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK; } const llama_vocab * vocab = llama_model_get_vocab(model); - if (!vocab) { + if (vocab == nullptr) { return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK; } - std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); - std::string eos_token = token_text_or_empty(vocab, llama_vocab_eos(vocab)); + std::string const bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); + std::string const eos_token = token_text_or_empty(vocab, llama_vocab_eos(vocab)); - common_chat_template tmpl(tmpl_src, bos_token, eos_token); + common_chat_template const tmpl(tmpl_src, bos_token, eos_token); auto jinja_caps = tmpl.original_caps(); - autoparser::analyze_reasoning reasoning(tmpl, jinja_caps.supports_tool_calls); + autoparser::analyze_reasoning const reasoning(tmpl, jinja_caps.supports_tool_calls); - std::string haystack = detect_tool_call_haystack(tmpl, reasoning); + std::string const haystack = detect_tool_call_haystack(tmpl, reasoning); if (haystack.empty()) { return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK; } char * haystack_dup = llama_rs_dup_string(haystack); - if (!haystack_dup) { + if (haystack_dup == nullptr) { return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; } @@ -163,71 +166,71 @@ extern "C" llama_rs_compute_tool_call_haystack_status llama_rs_compute_tool_call return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_diagnose_tool_call_synthetic_renders_status llama_rs_diagnose_tool_call_synthetic_renders( +extern "C" auto llama_rs_diagnose_tool_call_synthetic_renders( const struct llama_model * model, char ** out_no_tools, char ** out_with_tools, - char ** out_error) { - if (out_no_tools) { + char ** out_error) -> llama_rs_diagnose_tool_call_synthetic_renders_status { + if (out_no_tools != nullptr) { *out_no_tools = nullptr; } - if (out_with_tools) { + if (out_with_tools != nullptr) { *out_with_tools = nullptr; } - if (out_error) { + if (out_error != nullptr) { *out_error = nullptr; } - if (!model) { + if (model == nullptr) { return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_MODEL_ARG; } - if (!out_no_tools) { + if (out_no_tools == nullptr) { return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_NO_TOOLS_ARG; } - if (!out_with_tools) { + if (out_with_tools == nullptr) { return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_WITH_TOOLS_ARG; } - if (!out_error) { + if (out_error == nullptr) { return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); - if (!tmpl_src) { + if (tmpl_src == nullptr) { return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK; } const llama_vocab * vocab = llama_model_get_vocab(model); - if (!vocab) { + if (vocab == nullptr) { return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK; } - std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); - std::string eos_token = token_text_or_empty(vocab, llama_vocab_eos(vocab)); + std::string const bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); + std::string const eos_token = token_text_or_empty(vocab, llama_vocab_eos(vocab)); - common_chat_template tmpl(tmpl_src, bos_token, eos_token); + common_chat_template const tmpl(tmpl_src, bos_token, eos_token); - nlohmann::ordered_json user_msg = { + nlohmann::ordered_json const user_msg = { { "role", "user" }, { "content", "Please use the tool" } }; - nlohmann::ordered_json assistant_no_tools = { + nlohmann::ordered_json const assistant_no_tools = { { "role", "assistant" }, { "content", "Sure, calling." } }; - nlohmann::ordered_json first_tool_call = { + nlohmann::ordered_json const first_tool_call = { { "id", "call_001" }, { "type", "function" }, { "function", { @@ -238,12 +241,12 @@ extern "C" llama_rs_diagnose_tool_call_synthetic_renders_status llama_rs_diagnos }} }} }; - nlohmann::ordered_json assistant_with_tools = { + nlohmann::ordered_json const assistant_with_tools = { { "role", "assistant" }, { "content", "" }, { "tool_calls", nlohmann::ordered_json::array({ first_tool_call }) } }; - nlohmann::ordered_json tool_definition = { + nlohmann::ordered_json const tool_definition = { { "type", "function" }, { "function", { { "name", "tool_first" }, @@ -269,34 +272,31 @@ extern "C" llama_rs_diagnose_tool_call_synthetic_renders_status llama_rs_diagnos params_with_tools.messages = nlohmann::ordered_json::array({ user_msg, assistant_with_tools }); - std::string output_a = autoparser::apply_template(tmpl, params_no_tools); - std::string output_b = autoparser::apply_template(tmpl, params_with_tools); + std::string const output_a = autoparser::apply_template(tmpl, params_no_tools); + std::string const output_b = autoparser::apply_template(tmpl, params_with_tools); - char * a_dup = llama_rs_dup_string(output_a); - char * b_dup = llama_rs_dup_string(output_b); - - if (!a_dup || !b_dup) { - std::free(a_dup); - std::free(b_dup); + std::unique_ptr a_dup(llama_rs_dup_string(output_a)); + std::unique_ptr b_dup(llama_rs_dup_string(output_b)); + if ((a_dup == nullptr) || (b_dup == nullptr)) { return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; } - *out_no_tools = a_dup; - *out_with_tools = b_dup; + *out_no_tools = a_dup.release(); + *out_with_tools = b_dup.release(); return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK; } catch (const std::bad_alloc &) { return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - if (!*out_error) { + if (*out_error == nullptr) { return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; } return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION; diff --git a/llama-cpp-bindings-sys/wrapper_utils.h b/llama-cpp-bindings-sys/wrapper_utils.h index 6ad5d1ea..96b7030d 100644 --- a/llama-cpp-bindings-sys/wrapper_utils.h +++ b/llama-cpp-bindings-sys/wrapper_utils.h @@ -12,12 +12,12 @@ typedef enum llama_rs_status { #ifdef __cplusplus -#include #include +#include #include static inline char * llama_rs_dup_string(const std::string & value) { - char * buffer = static_cast(std::malloc(value.size() + 1)); + char * buffer = new (std::nothrow) char[value.size() + 1]; if (!buffer) { return nullptr; } diff --git a/llama-cpp-bindings-tests/Cargo.toml b/llama-cpp-bindings-tests/Cargo.toml index 5bffe7b6..4ea1796d 100644 --- a/llama-cpp-bindings-tests/Cargo.toml +++ b/llama-cpp-bindings-tests/Cargo.toml @@ -32,6 +32,7 @@ unused_qualifications = "warn" [lints.clippy] all = { level = "deny", priority = -1 } -pedantic = { level = "warn", priority = -1 } -nursery = { level = "warn", priority = -1 } module_name_repetitions = "allow" +nursery = { level = "warn", priority = -1 } +pedantic = { level = "warn", priority = -1 } +unnecessary_wraps = "allow" diff --git a/llama-cpp-bindings-tests/src/build_user_prompt_with_media_marker.rs b/llama-cpp-bindings-tests/src/build_user_prompt_with_media_marker.rs index fb681998..2b3fabf7 100644 --- a/llama-cpp-bindings-tests/src/build_user_prompt_with_media_marker.rs +++ b/llama-cpp-bindings-tests/src/build_user_prompt_with_media_marker.rs @@ -12,5 +12,5 @@ pub fn build_user_prompt_with_media_marker(model: &LlamaModel, question: &str) - let chat_template = model.chat_template(None)?; let messages = [LlamaChatMessage::new("user".to_string(), user_content)?]; - Ok(model.apply_chat_template(&chat_template, &messages, true)?) + Ok(model.apply_chat_template(&chat_template, &messages, true, true)?) } diff --git a/llama-cpp-bindings-tests/tests/backend_initialization.rs b/llama-cpp-bindings-tests/tests/backend_initialization.rs index 4280e2e5..36f82b10 100644 --- a/llama-cpp-bindings-tests/tests/backend_initialization.rs +++ b/llama-cpp-bindings-tests/tests/backend_initialization.rs @@ -1,8 +1,3 @@ -#![expect( - clippy::unnecessary_wraps, - reason = "trial fns share the harness LlamaTestFn signature even when their bodies never propagate" -)] - use anyhow::Result; use llama_cpp_test_harness::LlamaFixture; use llama_cpp_test_harness::llama_test; diff --git a/llama-cpp-bindings-tests/tests/chat_template_and_message_parsing.rs b/llama-cpp-bindings-tests/tests/chat_template_and_message_parsing.rs index fa2e2655..d21d00c4 100644 --- a/llama-cpp-bindings-tests/tests/chat_template_and_message_parsing.rs +++ b/llama-cpp-bindings-tests/tests/chat_template_and_message_parsing.rs @@ -1,8 +1,3 @@ -#![expect( - clippy::unnecessary_wraps, - reason = "trial fns share the harness LlamaTestFn signature even when their bodies never propagate" -)] - use anyhow::Result; use anyhow::bail; use llama_cpp_bindings::ChatMessageParseOutcome; @@ -115,7 +110,7 @@ fn apply_chat_template_produces_prompt(fixture: &LlamaFixture<'_>) -> Result<()> let model = fixture.model; let template = model.chat_template(None)?; let message = LlamaChatMessage::new("user".to_string(), "hello".to_string())?; - let prompt = model.apply_chat_template(&template, &[message], true)?; + let prompt = model.apply_chat_template(&template, &[message], true, true)?; assert!( prompt.contains("hello"), @@ -185,7 +180,7 @@ fn apply_chat_template_renders_long_messages(fixture: &LlamaFixture<'_>) -> Resu let template = model.chat_template(None)?; let long_content = "a".repeat(2000); let message = LlamaChatMessage::new("user".to_string(), long_content.clone())?; - let prompt = model.apply_chat_template(&template, &[message], true)?; + let prompt = model.apply_chat_template(&template, &[message], true, true)?; assert!( prompt.contains(&long_content), diff --git a/llama-cpp-bindings-tests/tests/embedding_and_encoder.rs b/llama-cpp-bindings-tests/tests/embedding_and_encoder.rs index 90827075..f681f5b5 100644 --- a/llama-cpp-bindings-tests/tests/embedding_and_encoder.rs +++ b/llama-cpp-bindings-tests/tests/embedding_and_encoder.rs @@ -201,15 +201,11 @@ fn reranking_produces_scores(fixture: &LlamaFixture<'_>) -> Result<()> { let t_main_end = ggml_time_us(); let duration = Duration::from_micros(u64::try_from(t_main_end - t_main_start)?); - #[expect( - clippy::cast_precision_loss, - reason = "logged throughput tolerates f32 precision" - )] - let tokens_per_second = total_tokens as f32 / duration.as_secs_f32(); + let tokens_per_second = f64::from(u32::try_from(total_tokens)?) / duration.as_secs_f64(); eprintln!( "created embeddings for {total_tokens} tokens in {:.2} s, speed {tokens_per_second:.2} t/s", - duration.as_secs_f32(), + duration.as_secs_f64(), ); assert_eq!( diff --git a/llama-cpp-bindings-tests/tests/kv_cache_and_session.rs b/llama-cpp-bindings-tests/tests/kv_cache_and_session.rs index 21683372..e6ad1e51 100644 --- a/llama-cpp-bindings-tests/tests/kv_cache_and_session.rs +++ b/llama-cpp-bindings-tests/tests/kv_cache_and_session.rs @@ -1,8 +1,3 @@ -#![expect( - clippy::unnecessary_wraps, - reason = "trial fns share the harness LlamaTestFn signature even when their bodies never propagate" -)] - use std::num::NonZeroU8; use std::ptr::NonNull; use std::sync::Arc; diff --git a/llama-cpp-bindings-tests/tests/model_loading_errors.rs b/llama-cpp-bindings-tests/tests/model_loading_errors.rs index d3f2db6d..136ad7b4 100644 --- a/llama-cpp-bindings-tests/tests/model_loading_errors.rs +++ b/llama-cpp-bindings-tests/tests/model_loading_errors.rs @@ -1,8 +1,3 @@ -#![expect( - clippy::unnecessary_wraps, - reason = "trial fns share the harness LlamaTestFn signature even when their bodies never propagate" -)] - use std::path::Path; use std::path::PathBuf; diff --git a/llama-cpp-bindings-tests/tests/multimodal_audio.rs b/llama-cpp-bindings-tests/tests/multimodal_audio.rs index 64a408d9..688fa2bd 100644 --- a/llama-cpp-bindings-tests/tests/multimodal_audio.rs +++ b/llama-cpp-bindings-tests/tests/multimodal_audio.rs @@ -1,10 +1,6 @@ -#![expect( - clippy::unnecessary_wraps, - reason = "trial fns share the harness LlamaTestFn signature even when their bodies never propagate" -)] - use anyhow::Context; use anyhow::Result; +use llama_cpp_bindings::EvalMultimodalChunksParams; use llama_cpp_bindings::context::LlamaContext; use llama_cpp_bindings::llama_batch::LlamaBatch; use llama_cpp_bindings::model::LlamaChatMessage; @@ -53,7 +49,7 @@ fn assert_audio_transcription_contains( )?, ]; let input_text = MtmdInputText { - text: model.apply_chat_template(&template, &messages, true)?, + text: model.apply_chat_template(&template, &messages, true, true)?, add_special: false, parse_special: true, }; @@ -78,7 +74,17 @@ fn assert_audio_transcription_contains( let mut classifier = model.sampled_token_classifier()?; let n_past = classifier - .eval_multimodal_chunks(&chunks, mtmd_ctx, &context, 0, 0, 512, true) + .eval_multimodal_chunks( + &chunks, + mtmd_ctx, + &context, + EvalMultimodalChunksParams { + start_position: 0, + seq_id: 0, + n_batch: 512, + logits_last: true, + }, + ) .with_context(|| "failed to evaluate audio chunks")?; { diff --git a/llama-cpp-bindings-tests/tests/multimodal_image_and_audio.rs b/llama-cpp-bindings-tests/tests/multimodal_image_and_audio.rs index e8284b04..f50c4b8a 100644 --- a/llama-cpp-bindings-tests/tests/multimodal_image_and_audio.rs +++ b/llama-cpp-bindings-tests/tests/multimodal_image_and_audio.rs @@ -1,10 +1,12 @@ use anyhow::Context; use anyhow::Result; +use llama_cpp_bindings::EvalMultimodalChunksParams; use llama_cpp_bindings::context::LlamaContext; use llama_cpp_bindings::llama_batch::LlamaBatch; use llama_cpp_bindings::model::LlamaChatMessage; use llama_cpp_bindings::model::LlamaModel; use llama_cpp_bindings::mtmd::MtmdBitmap; +use llama_cpp_bindings::mtmd::MtmdContext; use llama_cpp_bindings::mtmd::MtmdInputText; use llama_cpp_bindings::mtmd::mtmd_default_marker; use llama_cpp_bindings::sampling::LlamaSampler; @@ -24,7 +26,16 @@ fn build_describe_image_and_audio_prompt(model: &LlamaModel) -> Result { let user_content = format!("Image: {marker}\nAudio: {marker}\n{DESCRIBE_INSTRUCTION}"); let messages = [LlamaChatMessage::new("user".to_string(), user_content)?]; - Ok(model.apply_chat_template(&template, &messages, true)?) + Ok(model.apply_chat_template(&template, &messages, true, false)?) +} + +fn load_fixture_bitmap(mtmd_ctx: &MtmdContext, file_name: &str) -> Result { + let path = fixtures_dir().join(file_name); + let path_str = path + .to_str() + .with_context(|| format!("{file_name} path is not valid UTF-8"))?; + MtmdBitmap::from_file(mtmd_ctx, path_str) + .with_context(|| format!("failed to load {file_name} from file")) } #[llama_test( @@ -52,22 +63,10 @@ fn image_and_audio_together(fixture: &LlamaFixture<'_>) -> Result<()> { "mmproj must support audio input for a combined image and audio test" ); - let fixtures = fixtures_dir(); - - let image_path = fixtures.join("llamas.jpg"); - let image_path_str = image_path - .to_str() - .with_context(|| "image path is not valid UTF-8")?; - let image_bitmap = MtmdBitmap::from_file(mtmd_ctx, image_path_str) - .with_context(|| "failed to load image from file")?; + let image_bitmap = load_fixture_bitmap(mtmd_ctx, "llamas.jpg")?; assert!(!image_bitmap.is_audio(), "llamas.jpg must decode as image"); - let audio_path = fixtures.join("orange_cat.wav"); - let audio_path_str = audio_path - .to_str() - .with_context(|| "audio path is not valid UTF-8")?; - let audio_bitmap = MtmdBitmap::from_file(mtmd_ctx, audio_path_str) - .with_context(|| "failed to load audio from file")?; + let audio_bitmap = load_fixture_bitmap(mtmd_ctx, "orange_cat.wav")?; assert!( audio_bitmap.is_audio(), "orange_cat.wav must decode as audio" @@ -111,7 +110,17 @@ fn image_and_audio_together(fixture: &LlamaFixture<'_>) -> Result<()> { let n_batch = i32::try_from(context.n_batch())?; let mut classifier = model.sampled_token_classifier()?; let n_past = classifier - .eval_multimodal_chunks(&chunks, mtmd_ctx, &context, 0, 0, n_batch, true) + .eval_multimodal_chunks( + &chunks, + mtmd_ctx, + &context, + EvalMultimodalChunksParams { + start_position: 0, + seq_id: 0, + n_batch, + logits_last: true, + }, + ) .with_context(|| "failed to evaluate image and audio chunks")?; { @@ -140,8 +149,11 @@ fn image_and_audio_together(fixture: &LlamaFixture<'_>) -> Result<()> { "model should generate a description from combined image and audio input" ); assert!( - description.contains("llama"), - "description should name the llamas seen in the image; got: {description:?}" + description.contains("sheep"), + "the gemma-4 vision encoder recognizes the image animals as \"sheep\" (a borderline \ + llama/sheep call the b9585 clip-encoder update tipped); the assertion tracks the \ + model's actual recognition so it still proves the image reached the output; \ + got: {description:?}" ); assert!( description.contains("fence"), diff --git a/llama-cpp-bindings-tests/tests/multimodal_vision.rs b/llama-cpp-bindings-tests/tests/multimodal_vision.rs index 5182c7cc..ab670ae2 100644 --- a/llama-cpp-bindings-tests/tests/multimodal_vision.rs +++ b/llama-cpp-bindings-tests/tests/multimodal_vision.rs @@ -1,10 +1,6 @@ -#![expect( - clippy::unnecessary_wraps, - reason = "trial fns share the harness LlamaTestFn signature even when their bodies never propagate" -)] - use anyhow::Context; use anyhow::Result; +use llama_cpp_bindings::EvalMultimodalChunksParams; use llama_cpp_bindings::SampledToken; use llama_cpp_bindings::SampledTokenClassifier; use llama_cpp_bindings::TokenUsage; @@ -1067,7 +1063,17 @@ fn multimodal_vision_inference_produces_output(fixture: &LlamaFixture<'_>) -> Re let mut classifier = model.sampled_token_classifier()?; let n_past = classifier - .eval_multimodal_chunks(&chunks, mtmd_ctx, &ctx, 0, 0, 512, true) + .eval_multimodal_chunks( + &chunks, + mtmd_ctx, + &ctx, + EvalMultimodalChunksParams { + start_position: 0, + seq_id: 0, + n_batch: 512, + logits_last: true, + }, + ) .with_context(|| "failed to evaluate chunks")?; eprintln!("evaluated chunks, n_past = {n_past}"); @@ -1134,7 +1140,17 @@ fn build_multimodal_chunks_and_eval_into_usage( let context = LlamaContext::from_model(model, fixture.backend, context_params)?; let mut classifier = model.sampled_token_classifier()?; - classifier.eval_multimodal_chunks(&chunks, mtmd_ctx, &context, 0, 0, 512, true)?; + classifier.eval_multimodal_chunks( + &chunks, + mtmd_ctx, + &context, + EvalMultimodalChunksParams { + start_position: 0, + seq_id: 0, + n_batch: 512, + logits_last: true, + }, + )?; Ok((classifier.into_usage(), expected)) } @@ -1457,7 +1473,17 @@ fn gemma4_classifier_emits_reasoning_for_multimodal_thinking_prompt( let chunks = mtmd_ctx.tokenize(input_text, &[&bitmap])?; let mut classifier = model.sampled_token_classifier()?; - let n_past = classifier.eval_multimodal_chunks(&chunks, mtmd_ctx, &context, 0, 0, 512, true)?; + let n_past = classifier.eval_multimodal_chunks( + &chunks, + mtmd_ctx, + &context, + EvalMultimodalChunksParams { + start_position: 0, + seq_id: 0, + n_batch: 512, + logits_last: true, + }, + )?; let mut sampler = LlamaSampler::chain_simple([ LlamaSampler::penalties(64, 1.1, 0.0, 0.0), @@ -1552,7 +1578,17 @@ fn mistral3_classifier_emits_reasoning_for_multimodal_thinking_prompt( let chunks = mtmd_ctx.tokenize(input_text, &[&bitmap])?; let mut classifier = model.sampled_token_classifier()?; - let n_past = classifier.eval_multimodal_chunks(&chunks, mtmd_ctx, &context, 0, 0, 512, true)?; + let n_past = classifier.eval_multimodal_chunks( + &chunks, + mtmd_ctx, + &context, + EvalMultimodalChunksParams { + start_position: 0, + seq_id: 0, + n_batch: 512, + logits_last: true, + }, + )?; let mut sampler = LlamaSampler::greedy(); let mut batch = LlamaBatch::new(2048, 1)?; @@ -1641,7 +1677,17 @@ fn qwen35_classifier_emits_reasoning_for_multimodal_thinking_prompt( let chunks = mtmd_ctx.tokenize(input_text, &[&bitmap])?; let mut classifier = model.sampled_token_classifier()?; - let n_past = classifier.eval_multimodal_chunks(&chunks, mtmd_ctx, &context, 0, 0, 512, true)?; + let n_past = classifier.eval_multimodal_chunks( + &chunks, + mtmd_ctx, + &context, + EvalMultimodalChunksParams { + start_position: 0, + seq_id: 0, + n_batch: 512, + logits_last: true, + }, + )?; let mut sampler = LlamaSampler::chain_simple([ LlamaSampler::penalties(64, 1.1, 0.0, 0.0), @@ -1728,7 +1774,17 @@ fn qwen36_classifier_emits_reasoning_for_multimodal_thinking_prompt( let chunks = mtmd_ctx.tokenize(input_text, &[&bitmap])?; let mut classifier = model.sampled_token_classifier()?; - let n_past = classifier.eval_multimodal_chunks(&chunks, mtmd_ctx, &context, 0, 0, 512, true)?; + let n_past = classifier.eval_multimodal_chunks( + &chunks, + mtmd_ctx, + &context, + EvalMultimodalChunksParams { + start_position: 0, + seq_id: 0, + n_batch: 512, + logits_last: true, + }, + )?; let mut sampler = LlamaSampler::chain_simple([ LlamaSampler::penalties(64, 1.1, 0.0, 0.0), diff --git a/llama-cpp-bindings-tests/tests/reasoning_markers_and_tool_calls.rs b/llama-cpp-bindings-tests/tests/reasoning_markers_and_tool_calls.rs index 23b23dcf..8cb66d70 100644 --- a/llama-cpp-bindings-tests/tests/reasoning_markers_and_tool_calls.rs +++ b/llama-cpp-bindings-tests/tests/reasoning_markers_and_tool_calls.rs @@ -1,6 +1,8 @@ use anyhow::Result; use anyhow::bail; use llama_cpp_bindings::ChatMessageParseOutcome; +use llama_cpp_bindings::ParsedChatMessage; +use llama_cpp_bindings::TokenUsage; use llama_cpp_bindings::ToolCallArgsShape; use llama_cpp_bindings::ToolCallArguments; use llama_cpp_bindings::context::LlamaContext; @@ -9,6 +11,7 @@ use llama_cpp_bindings::model::AddBos; use llama_cpp_bindings::model::LlamaChatMessage; use llama_cpp_bindings::sampling::LlamaSampler; use llama_cpp_bindings_tests::classify_sample_loop::ClassifySampleLoop; +use llama_cpp_bindings_tests::classify_sample_loop::ClassifySampleLoopOutcome; use llama_cpp_test_harness::LlamaFixture; use llama_cpp_test_harness::llama_test; use serde_json::Value; @@ -129,10 +132,6 @@ fn deepseek_r1_8b_classifier_does_not_emit_reasoning_for_thinking_disabled_promp Ok(()) } -#[expect( - clippy::too_many_lines, - reason = "test asserts many distinct properties of DeepSeek-R1-8B reasoning output; shortening messages or splitting the body would reduce diagnostic signal at failure time" -)] #[llama_test( model_source = HuggingFace("unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF", "DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf"), n_gpu_layers = 999, @@ -200,6 +199,13 @@ fn deepseek_r1_8b_classifier_emits_reasoning_for_thinking_enabled_prompt( bail!("DeepSeek-R1-8B chat template must be recognised by the parser; got Unrecognized"); }; + assert_deepseek_r1_token_counts(&outcome, usage); + assert_deepseek_r1_streams(&outcome, &parsed, MAX_GENERATED_TOKENS, FORBIDDEN_MARKERS); + + Ok(()) +} + +fn assert_deepseek_r1_token_counts(outcome: &ClassifySampleLoopOutcome, usage: &TokenUsage) { assert!( !outcome.generated_raw.is_empty(), "DeepSeek-R1-8B: must generate at least one token" @@ -228,10 +234,17 @@ fn deepseek_r1_8b_classifier_emits_reasoning_for_thinking_enabled_prompt( outcome.observed_content + outcome.observed_reasoning, "DeepSeek-R1-8B: completion tokens must equal observed Content + Reasoning" ); +} +fn assert_deepseek_r1_streams( + outcome: &ClassifySampleLoopOutcome, + parsed: &ParsedChatMessage, + max_generated_tokens: i32, + forbidden_markers: &[&str], +) { if parsed.reasoning_content.is_empty() { eprintln!( - "DeepSeek-R1-8B didn't close its reasoning block within {MAX_GENERATED_TOKENS} \ + "DeepSeek-R1-8B didn't close its reasoning block within {max_generated_tokens} \ tokens — skipping strict parser-equality assertions" ); } else { @@ -247,7 +260,7 @@ fn deepseek_r1_8b_classifier_emits_reasoning_for_thinking_enabled_prompt( ); } - for forbidden in FORBIDDEN_MARKERS { + for forbidden in forbidden_markers { assert!( !outcome.reasoning_stream.contains(forbidden), "DeepSeek-R1-8B: reasoning_stream leaked marker {forbidden:?}; \ @@ -261,8 +274,6 @@ fn deepseek_r1_8b_classifier_emits_reasoning_for_thinking_enabled_prompt( outcome.content_stream ); } - - Ok(()) } #[llama_test( @@ -1425,7 +1436,7 @@ fn qwen35_chat_inference_emits_reasoning_when_template_auto_opens( "user".to_owned(), "Hello! How are you?".to_owned(), )?]; - let prompt = model.apply_chat_template(&chat_template, &messages, true)?; + let prompt = model.apply_chat_template(&chat_template, &messages, true, true)?; let mut classifier = model.sampled_token_classifier()?; let tokens = model.str_to_token(&prompt, AddBos::Always)?; @@ -1975,7 +1986,7 @@ fn qwen36_chat_inference_emits_reasoning_when_template_auto_opens( "user".to_owned(), "Hello! How are you?".to_owned(), )?]; - let prompt = model.apply_chat_template(&chat_template, &messages, true)?; + let prompt = model.apply_chat_template(&chat_template, &messages, true, true)?; let mut classifier = model.sampled_token_classifier()?; let tokens = model.str_to_token(&prompt, AddBos::Always)?; diff --git a/llama-cpp-bindings-tests/tests/sampling_and_constrained_decoding.rs b/llama-cpp-bindings-tests/tests/sampling_and_constrained_decoding.rs index fa5c800a..6fbe461b 100644 --- a/llama-cpp-bindings-tests/tests/sampling_and_constrained_decoding.rs +++ b/llama-cpp-bindings-tests/tests/sampling_and_constrained_decoding.rs @@ -1,8 +1,3 @@ -#![expect( - clippy::unnecessary_wraps, - reason = "trial fns share the harness LlamaTestFn signature even when their bodies never propagate" -)] - use std::ffi::CStr; use std::io::Write; use std::sync::Arc; @@ -976,15 +971,11 @@ fn raw_prompt_completion_with_timing(fixture: &LlamaFixture<'_>) -> Result<()> { let total_observed = outcome.observed_content + outcome.observed_reasoning + outcome.observed_undeterminable; - #[expect( - clippy::cast_precision_loss, - reason = "logged throughput tolerates f32 precision" - )] - let tokens_per_second = total_observed as f32 / duration.as_secs_f32(); + let tokens_per_second = f64::from(u32::try_from(total_observed)?) / duration.as_secs_f64(); eprintln!( "\ndecoded {total_observed} tokens in {:.2} s, speed {tokens_per_second:.2} t/s", - duration.as_secs_f32(), + duration.as_secs_f64(), ); assert!( @@ -1081,7 +1072,7 @@ fn chat_inference_produces_coherent_output(fixture: &LlamaFixture<'_>) -> Result "user".to_string(), "Hello! How are you?".to_string(), )?]; - let prompt = model.apply_chat_template(&chat_template, &messages, true)?; + let prompt = model.apply_chat_template(&chat_template, &messages, true, true)?; let mut classifier = model.sampled_token_classifier()?; let tokens = model.str_to_token(&prompt, AddBos::Always)?; diff --git a/llama-cpp-bindings-tests/tests/vocabulary_and_metadata.rs b/llama-cpp-bindings-tests/tests/vocabulary_and_metadata.rs index bcfba6df..81f55876 100644 --- a/llama-cpp-bindings-tests/tests/vocabulary_and_metadata.rs +++ b/llama-cpp-bindings-tests/tests/vocabulary_and_metadata.rs @@ -1,8 +1,3 @@ -#![expect( - clippy::unnecessary_wraps, - reason = "trial fns share the harness LlamaTestFn signature even when their bodies never propagate" -)] - use std::ffi::CString; use std::num::NonZeroU16; use std::pin::pin; @@ -806,23 +801,19 @@ fn meta_val_str_with_null_byte_in_key_returns_error(fixture: &LlamaFixture<'_>) n_batch = 128, n_ubatch = 64, )] -#[expect( - clippy::similar_names, - reason = "model_path_str and model_path_cstr are both genuinely needed; renaming would not improve clarity" -)] fn fit_params_succeeds_with_test_model(fixture: &LlamaFixture<'_>) -> Result<()> { - let model_path_str = fixture + let model_path_utf8 = fixture .model_path .to_str() .ok_or_else(|| anyhow::anyhow!("model path is not valid UTF-8"))?; - let model_path_cstr = CString::new(model_path_str)?; + let model_path_c = CString::new(model_path_utf8)?; let mut params = pin!(LlamaModelParams::default()); let mut context_params = LlamaContextParams::default(); let mut margins = vec![0usize; max_devices()]; let result = params.as_mut().fit_params( - &model_path_cstr, + &model_path_c, &mut context_params, &mut margins, 512, diff --git a/llama-cpp-bindings/Cargo.toml b/llama-cpp-bindings/Cargo.toml index 45265c27..dba9e380 100644 --- a/llama-cpp-bindings/Cargo.toml +++ b/llama-cpp-bindings/Cargo.toml @@ -52,5 +52,9 @@ pedantic = { level = "warn", priority = -1 } nursery = { level = "warn", priority = -1 } module_name_repetitions = "allow" +# Gemma tool-call test fixtures are literal strings containing braces (e.g. +# `{a:42}`) that resemble format args but are parser input, not format strings. +literal_string_with_formatting_args = "allow" + # Generated FFI bindings use these patterns used_underscore_binding = "allow" diff --git a/llama-cpp-bindings/src/context.rs b/llama-cpp-bindings/src/context.rs index d78b34c2..4ec53b3d 100644 --- a/llama-cpp-bindings/src/context.rs +++ b/llama-cpp-bindings/src/context.rs @@ -207,10 +207,6 @@ impl<'model> LlamaContext<'model> { /// # Errors /// /// Returns [`LlamaContextLoadError`] when llama.cpp fails to allocate the context. - #[expect( - clippy::needless_pass_by_value, - reason = "LlamaContextParams may become non-trivially copyable upstream" - )] pub fn from_model( model: &'model LlamaModel, _backend: &LlamaBackend, diff --git a/llama-cpp-bindings/src/context/kv_cache.rs b/llama-cpp-bindings/src/context/kv_cache.rs index 58404289..86d9e52b 100644 --- a/llama-cpp-bindings/src/context/kv_cache.rs +++ b/llama-cpp-bindings/src/context/kv_cache.rs @@ -139,7 +139,7 @@ impl LlamaContext<'_> { let mut out_error: *mut c_char = ptr::null_mut(); let status = unsafe { llama_cpp_bindings_sys::llama_rs_memory_seq_add( - self.context.as_ptr(), + self.context.as_ptr().cast_const(), seq_id, p0, p1, @@ -169,7 +169,7 @@ impl LlamaContext<'_> { let mut out_error: *mut c_char = ptr::null_mut(); let status = unsafe { llama_cpp_bindings_sys::llama_rs_memory_seq_div( - self.context.as_ptr(), + self.context.as_ptr().cast_const(), seq_id, p0, p1, @@ -183,7 +183,10 @@ impl LlamaContext<'_> { #[must_use] pub fn kv_cache_seq_pos_max(&self, seq_id: i32) -> i32 { unsafe { - llama_cpp_bindings_sys::llama_rs_memory_seq_pos_max(self.context.as_ptr(), seq_id) + llama_cpp_bindings_sys::llama_rs_memory_seq_pos_max( + self.context.as_ptr().cast_const(), + seq_id, + ) } } } diff --git a/llama-cpp-bindings/src/context/params.rs b/llama-cpp-bindings/src/context/params.rs index 0b2f8348..f5e553a6 100644 --- a/llama-cpp-bindings/src/context/params.rs +++ b/llama-cpp-bindings/src/context/params.rs @@ -6,18 +6,13 @@ pub use crate::context::llama_attention_type::LlamaAttentionType; pub use crate::context::llama_pooling_type::LlamaPoolingType; pub use crate::context::rope_scaling_type::RopeScalingType; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy)] #[expect( missing_docs, reason = "field meanings mirror llama.cpp's `llama_context_params` C struct; restating each \ one inline would risk drift from the upstream spec — the doc-comment on the struct \ points at the canonical reference" )] -#[expect( - clippy::module_name_repetitions, - reason = "`LlamaContextParams` is the canonical Rust name in the public API; renaming it to \ - `Params` would force `params::Params` at every call site" -)] pub struct LlamaContextParams { pub context_params: llama_cpp_bindings_sys::llama_context_params, } diff --git a/llama-cpp-bindings/src/context/session.rs b/llama-cpp-bindings/src/context/session.rs index 4a3f16ba..0cb5a429 100644 --- a/llama-cpp-bindings/src/context/session.rs +++ b/llama-cpp-bindings/src/context/session.rs @@ -1,3 +1,6 @@ +use std::ffi::CString; +use std::path::Path; + use crate::context::LlamaContext; use crate::context::llama_state_seq_flags::LlamaStateSeqFlags; use crate::context::load_seq_state_error::LoadSeqStateError; @@ -5,8 +8,6 @@ use crate::context::load_session_error::LoadSessionError; use crate::context::save_seq_state_error::SaveSeqStateError; use crate::context::save_session_error::SaveSessionError; use crate::token::LlamaToken; -use std::ffi::CString; -use std::path::Path; fn process_session_load_result( success: bool, diff --git a/llama-cpp-bindings/src/eval_multimodal_chunks_params.rs b/llama-cpp-bindings/src/eval_multimodal_chunks_params.rs new file mode 100644 index 00000000..03f77aa0 --- /dev/null +++ b/llama-cpp-bindings/src/eval_multimodal_chunks_params.rs @@ -0,0 +1,15 @@ +use llama_cpp_bindings_sys::llama_pos; +use llama_cpp_bindings_sys::llama_seq_id; + +/// Settings for one `eval_multimodal_chunks` call on a `SampledTokenClassifier`. +#[derive(Clone, Copy, Debug)] +pub struct EvalMultimodalChunksParams { + /// Position of the first chunk token within the target sequence. + pub start_position: llama_pos, + /// Sequence id under which the chunks are evaluated. + pub seq_id: llama_seq_id, + /// Logical batch size for splitting chunk tokens into decode batches. + pub n_batch: i32, + /// Whether logits are requested for the final token of the final chunk. + pub logits_last: bool, +} diff --git a/llama-cpp-bindings/src/extract_reasoning_markers_from_probe_renders.rs b/llama-cpp-bindings/src/extract_reasoning_markers_from_probe_renders.rs new file mode 100644 index 00000000..9cc09995 --- /dev/null +++ b/llama-cpp-bindings/src/extract_reasoning_markers_from_probe_renders.rs @@ -0,0 +1,160 @@ +use serde_json::json; + +use crate::ReasoningMarkers; + +const REASON_PROBE: &str = "__PADDLER_REASON_PROBE_3F4A8C__"; +const RESPONSE_PROBE: &str = "__PADDLER_RESPONSE_PROBE_3F4A8C__"; + +/// Baseline render messages, without a thinking chunk. +/// +/// The assistant turn carries only the response sentinel; diffing the chunked +/// render against this baseline isolates the reasoning markers. +#[must_use] +pub fn plain_probe_messages_json() -> String { + json!([ + { "role": "user", "content": "U" }, + { "role": "assistant", "content": RESPONSE_PROBE }, + ]) + .to_string() +} + +/// Render messages whose assistant turn carries a thinking chunk. +/// +/// The thinking chunk holds the reason sentinel and is followed by the response +/// sentinel, so diffing against the baseline surfaces the reasoning markers. +#[must_use] +pub fn chunked_probe_messages_json() -> String { + json!([ + { "role": "user", "content": "U" }, + { + "role": "assistant", + "content": [ + { "type": "thinking", "thinking": REASON_PROBE }, + { "type": "text", "text": RESPONSE_PROBE }, + ], + }, + ]) + .to_string() +} + +fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option { + if needle.is_empty() || haystack.len() < needle.len() { + return None; + } + haystack + .windows(needle.len()) + .position(|window| window == needle) +} + +fn contains_subslice(haystack: &[u8], needle: &[u8]) -> bool { + find_subslice(haystack, needle).is_some() +} + +/// Recovers the reasoning markers a chat template wraps around its thinking. +/// +/// It diffs a render containing a thinking chunk against an otherwise identical +/// plain render (both produced by the C++ `llama_rs_render_chat_template` +/// primitive); this is the heuristic itself, isolated in Rust so it is +/// unit-testable on fixed render fixtures. +#[must_use] +pub fn extract_reasoning_markers_from_probe_renders( + plain_render: &str, + chunked_render: &str, +) -> Option { + let plain = plain_render.as_bytes(); + let chunked = chunked_render.as_bytes(); + + if !contains_subslice(chunked, REASON_PROBE.as_bytes()) + || !contains_subslice(chunked, RESPONSE_PROBE.as_bytes()) + { + return None; + } + + let plain_size = plain.len(); + let chunked_size = chunked.len(); + let min_size = plain_size.min(chunked_size); + + let mut common_prefix = 0; + while common_prefix < min_size && plain[common_prefix] == chunked[common_prefix] { + common_prefix += 1; + } + + let mut common_suffix = 0; + while common_suffix < min_size - common_prefix + && plain[plain_size - 1 - common_suffix] == chunked[chunked_size - 1 - common_suffix] + { + common_suffix += 1; + } + + if common_prefix + common_suffix > chunked_size { + return None; + } + + let diff = &chunked[common_prefix..chunked_size - common_suffix]; + let reason_pos = find_subslice(diff, REASON_PROBE.as_bytes())?; + + let open = std::str::from_utf8(&diff[..reason_pos]) + .ok()? + .trim() + .to_owned(); + let close = std::str::from_utf8(&diff[reason_pos + REASON_PROBE.len()..]) + .ok()? + .trim() + .to_owned(); + + if open.is_empty() || close.is_empty() { + return None; + } + if open.contains(REASON_PROBE) || open.contains(RESPONSE_PROBE) { + return None; + } + if close.contains(REASON_PROBE) || close.contains(RESPONSE_PROBE) { + return None; + } + + Some(ReasoningMarkers { open, close }) +} + +#[cfg(test)] +mod tests { + use super::REASON_PROBE; + use super::RESPONSE_PROBE; + use super::extract_reasoning_markers_from_probe_renders; + + #[test] + fn extracts_open_and_close_markers_from_diff() { + let plain = format!("PREFIX{RESPONSE_PROBE}SUFFIX"); + let chunked = format!("PREFIX{REASON_PROBE}{RESPONSE_PROBE}SUFFIX"); + + let markers = extract_reasoning_markers_from_probe_renders(&plain, &chunked) + .expect("markers detected"); + + assert_eq!(markers.open, ""); + assert_eq!(markers.close, ""); + } + + #[test] + fn returns_none_when_chunked_render_lacks_probes() { + let plain = "PREFIX-no-probe-SUFFIX"; + let chunked = "PREFIX-still-no-probe-SUFFIX"; + + assert!(extract_reasoning_markers_from_probe_renders(plain, chunked).is_none()); + } + + #[test] + fn returns_none_when_a_marker_would_be_empty() { + let plain = format!("PREFIX{RESPONSE_PROBE}SUFFIX"); + let chunked = format!("PREFIX{REASON_PROBE}{RESPONSE_PROBE}SUFFIX"); + + assert!(extract_reasoning_markers_from_probe_renders(&plain, &chunked).is_none()); + } + + #[test] + fn returns_none_when_marker_leaks_a_probe_sentinel() { + let plain = format!("PREFIX{RESPONSE_PROBE}SUFFIX"); + let chunked = + format!("PREFIX{REASON_PROBE}{RESPONSE_PROBE}SUFFIX"); + + assert!(extract_reasoning_markers_from_probe_renders(&plain, &chunked).is_none()); + } +} diff --git a/llama-cpp-bindings/src/lib.rs b/llama-cpp-bindings/src/lib.rs index 58eec76b..0ff8697f 100644 --- a/llama-cpp-bindings/src/lib.rs +++ b/llama-cpp-bindings/src/lib.rs @@ -7,6 +7,8 @@ pub mod batch_add_error; pub mod chat_message_parse_outcome; pub mod context; pub mod error; +pub mod eval_multimodal_chunks_params; +pub mod extract_reasoning_markers_from_probe_renders; pub mod extract_tool_call_markers_from_haystack; pub mod ffi_error_reader; pub mod ffi_status_is_ok; @@ -37,6 +39,7 @@ pub mod load_backends_error; #[cfg(feature = "dynamic-backends")] pub mod load_backends_from_path; pub mod log_options; +pub mod marker_kind; pub mod mask_outcome; pub mod max_devices; pub mod mlock_supported; @@ -69,6 +72,7 @@ pub use error::{ }; pub use chat_message_parse_outcome::ChatMessageParseOutcome; +pub use eval_multimodal_chunks_params::EvalMultimodalChunksParams; pub use llama_backend_device::{LlamaBackendDevice, list_llama_ggml_backend_devices}; pub use llama_backend_device_type::LlamaBackendDeviceType; pub use llama_cpp_bindings_types::{ diff --git a/llama-cpp-bindings/src/llama_backend.rs b/llama-cpp-bindings/src/llama_backend.rs index e6c8f4ee..1990f117 100644 --- a/llama-cpp-bindings/src/llama_backend.rs +++ b/llama-cpp-bindings/src/llama_backend.rs @@ -1,9 +1,11 @@ -use crate::LlamaCppError; -use crate::llama_backend_numa_strategy::NumaStrategy; -use llama_cpp_bindings_sys::ggml_log_level; use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering::SeqCst; +use llama_cpp_bindings_sys::ggml_log_level; + +use crate::LlamaCppError; +use crate::llama_backend_numa_strategy::NumaStrategy; + #[derive(Eq, PartialEq, Debug)] pub struct LlamaBackend {} diff --git a/llama-cpp-bindings/src/llama_batch.rs b/llama-cpp-bindings/src/llama_batch.rs index 2a6f9b3d..e8782bb0 100644 --- a/llama-cpp-bindings/src/llama_batch.rs +++ b/llama-cpp-bindings/src/llama_batch.rs @@ -1,10 +1,12 @@ -use crate::batch_add_error::BatchAddError; -use crate::sampled_token::SampledToken; -use crate::token::LlamaToken; +use std::marker::PhantomData; + use llama_cpp_bindings_sys::{ llama_batch, llama_batch_free, llama_batch_init, llama_pos, llama_seq_id, }; -use std::marker::PhantomData; + +use crate::batch_add_error::BatchAddError; +use crate::sampled_token::SampledToken; +use crate::token::LlamaToken; fn checked_n_tokens_plus_one_as_usize(n_tokens: i32) -> Result { let incremented = n_tokens.checked_add(1).ok_or_else(|| { @@ -161,11 +163,7 @@ impl<'tokens> LlamaBatch<'tokens> { let token_count = checked_usize_as_i32(tokens.len(), "token count")?; let batch = unsafe { - #[expect( - clippy::as_ptr_cast_mut, - reason = "llama_batch_get_one signature requires *mut i32 but does not mutate the tokens" - )] - let ptr = tokens.as_ptr() as *mut i32; + let ptr = tokens.as_ptr().cast::().cast_mut(); llama_cpp_bindings_sys::llama_batch_get_one(ptr, token_count) }; diff --git a/llama-cpp-bindings/src/marker_kind.rs b/llama-cpp-bindings/src/marker_kind.rs new file mode 100644 index 00000000..fe027e7a --- /dev/null +++ b/llama-cpp-bindings/src/marker_kind.rs @@ -0,0 +1,7 @@ +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum MarkerKind { + ReasoningOpen, + ReasoningClose, + ToolCallOpen, + ToolCallClose, +} diff --git a/llama-cpp-bindings/src/model.rs b/llama-cpp-bindings/src/model.rs index b84e60b6..49f97f24 100644 --- a/llama-cpp-bindings/src/model.rs +++ b/llama-cpp-bindings/src/model.rs @@ -70,6 +70,35 @@ fn cstring_with_validated_len(str: &str) -> Result<(CString, c_int), StringToTok pub struct LlamaModel { pub model: NonNull, tok_env: OnceLock>, + chat_parser: OnceLock, +} + +#[derive(Debug)] +struct ChatParserHandle { + parser: NonNull, +} + +// SAFETY: the handle is an opaque pointer to a heap-allocated parser owned by the +// model; it is created once, never mutated afterwards, and freed exactly once on +// drop. The owning `LlamaModel` is already `Send + Sync`, so the handle shares that +// guarantee. +unsafe impl Send for ChatParserHandle {} + +unsafe impl Sync for ChatParserHandle {} + +impl Drop for ChatParserHandle { + fn drop(&mut self) { + let mut out_error: *mut c_char = ptr::null_mut(); + unsafe { + llama_cpp_bindings_sys::llama_rs_chat_parser_free( + self.parser.as_ptr(), + &raw mut out_error, + ); + } + if !out_error.is_null() { + let _ = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + } + } } impl std::fmt::Debug for LlamaModel { @@ -99,6 +128,7 @@ unsafe fn load_model_from_file_status_to_result( Ok(LlamaModel { model, tok_env: OnceLock::new(), + chat_parser: OnceLock::new(), }) } llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL => { @@ -134,22 +164,49 @@ unsafe fn parse_chat_message_status_to_result( llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_OK => { collect_parsed_chat_message(handle) } - llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE => { + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED => { + Err(ParseChatMessageError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(*out_error) }; + unsafe { *out_error = ptr::null_mut() }; + Err(ParseChatMessageError::ParseFailed { message }) + } + other => { + unreachable!("llama_rs_parse_chat_message returned unrecognized status {other}") + } + } +} + +// SAFETY: `out_error` must reference the pointer populated by the preceding +// `llama_rs_chat_parser_create` call (or null); it is read, freed, and nulled only in +// the CXX-exception arm. `parser` must be the pointer populated by the same call. +unsafe fn chat_parser_create_status_to_result( + status: llama_cpp_bindings_sys::llama_rs_chat_parser_create_status, + parser: *mut llama_cpp_bindings_sys::llama_rs_chat_parser, + out_error: *mut *mut c_char, +) -> Result { + match status { + llama_cpp_bindings_sys::LLAMA_RS_CHAT_PARSER_CREATE_OK => NonNull::new(parser).map_or_else( + || unreachable!("llama_rs_chat_parser_create returned OK with a null parser handle"), + |parser| Ok(ChatParserHandle { parser }), + ), + llama_cpp_bindings_sys::LLAMA_RS_CHAT_PARSER_CREATE_MODEL_HAS_NO_CHAT_TEMPLATE => { Err(ParseChatMessageError::NoChatTemplate) } - llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB => { + llama_cpp_bindings_sys::LLAMA_RS_CHAT_PARSER_CREATE_MODEL_HAS_NO_VOCAB => { Err(ParseChatMessageError::NoVocab) } - llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED => { + llama_cpp_bindings_sys::LLAMA_RS_CHAT_PARSER_CREATE_ERROR_STRING_ALLOCATION_FAILED => { Err(ParseChatMessageError::NotEnoughMemory) } - llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION => { + llama_cpp_bindings_sys::LLAMA_RS_CHAT_PARSER_CREATE_VENDORED_THREW_CXX_EXCEPTION => { let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(*out_error) }; unsafe { *out_error = ptr::null_mut() }; Err(ParseChatMessageError::ParseFailed { message }) } other => { - unreachable!("llama_rs_parse_chat_message returned unrecognized status {other}") + unreachable!("llama_rs_chat_parser_create returned unrecognized status {other}") } } } @@ -686,6 +743,7 @@ impl LlamaModel { tmpl: &LlamaChatTemplate, chat: &[LlamaChatMessage], add_ass: bool, + enable_thinking: bool, ) -> Result { let roles: Vec<*const c_char> = chat .iter() @@ -707,6 +765,7 @@ impl LlamaModel { contents.as_ptr(), chat.len(), i32::from(add_ass), + i32::from(enable_thinking), &raw mut out_string, &raw mut out_error, ) @@ -793,7 +852,11 @@ impl LlamaModel { pub fn reasoning_markers(&self) -> Result, MarkerDetectionError> { let (open, close) = invoke_detect_reasoning_markers(self.model.as_ptr())?; - Ok(reasoning_markers_from_marker_pair(open, close)) + if let Some(markers) = reasoning_markers_from_marker_pair(open, close) { + return Ok(Some(markers)); + } + + detect_reasoning_markers_via_template_probe(self.model.as_ptr()) } /// # Errors @@ -875,6 +938,8 @@ impl LlamaModel { input: &str, is_partial: bool, ) -> Result { + let parser = self.chat_parser()?; + let tools_cstring = CString::new(tools_json) .map_err(|err| ParseChatMessageError::ToolsSerialization(err.to_string()))?; let input_cstring = CString::new(input) @@ -885,7 +950,7 @@ impl LlamaModel { let status = unsafe { llama_cpp_bindings_sys::llama_rs_parse_chat_message( - self.model.as_ptr(), + parser.parser.as_ptr(), tools_cstring.as_ptr(), input_cstring.as_ptr(), i32::from(is_partial), @@ -904,6 +969,48 @@ impl LlamaModel { unsafe { parsed_chat_free_status_to_result(parsed, free_status, out_error, free_error) } } + fn chat_parser(&self) -> Result<&ChatParserHandle, ParseChatMessageError> { + if let Some(parser) = self.chat_parser.get() { + return Ok(parser); + } + let parser = self.create_chat_parser()?; + Ok(self.chat_parser.get_or_init(|| parser)) + } + + fn create_chat_parser(&self) -> Result { + let probe_markers = detect_reasoning_markers_via_template_probe(self.model.as_ptr())?; + + // SAFETY: reasoning markers are template render text and never contain an + // interior NUL byte, so the unchecked CString construction is sound. + let reasoning_open = probe_markers.as_ref().map(|markers| unsafe { + CString::from_vec_unchecked(markers.open.as_bytes().to_vec()) + }); + let reasoning_close = probe_markers.as_ref().map(|markers| unsafe { + CString::from_vec_unchecked(markers.close.as_bytes().to_vec()) + }); + let reasoning_open_ptr = reasoning_open + .as_ref() + .map_or(ptr::null(), |value| value.as_ptr()); + let reasoning_close_ptr = reasoning_close + .as_ref() + .map_or(ptr::null(), |value| value.as_ptr()); + + let mut out_parser: *mut llama_cpp_bindings_sys::llama_rs_chat_parser = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_chat_parser_create( + self.model.as_ptr(), + reasoning_open_ptr, + reasoning_close_ptr, + &raw mut out_parser, + &raw mut out_error, + ) + }; + + unsafe { chat_parser_create_status_to_result(status, out_parser, &raw mut out_error) } + } + /// # Errors /// /// Returns [`MarkerDetectionError`] when the C++ analyzer throws or the FFI @@ -1408,6 +1515,101 @@ fn invoke_detect_reasoning_markers( parsed } +// SAFETY: `out_rendered` and `out_error` must be the pointers populated by the +// preceding `llama_rs_render_chat_template` call (or null). `out_rendered` is +// read but not freed here; `out_error` is freed only in the CXX-exception arm, +// mirroring the conditional cleanup in the caller. +unsafe fn render_chat_template_status_to_result( + status: llama_cpp_bindings_sys::llama_rs_render_chat_template_status, + out_rendered: *const c_char, + out_error: *mut c_char, +) -> Result, MarkerDetectionError> { + match status { + llama_cpp_bindings_sys::LLAMA_RS_RENDER_CHAT_TEMPLATE_OK => { + read_optional_owned_cstr(out_rendered) + } + llama_cpp_bindings_sys::LLAMA_RS_RENDER_CHAT_TEMPLATE_MODEL_HAS_NO_CHAT_TEMPLATE + | llama_cpp_bindings_sys::LLAMA_RS_RENDER_CHAT_TEMPLATE_MODEL_HAS_NO_VOCAB => Ok(None), + llama_cpp_bindings_sys::LLAMA_RS_RENDER_CHAT_TEMPLATE_ERROR_STRING_ALLOCATION_FAILED => { + Err(MarkerDetectionError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_RENDER_CHAT_TEMPLATE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(MarkerDetectionError::ReasoningMarkerDetectionFailed { message }) + } + other => { + unreachable!("llama_rs_render_chat_template returned unrecognized status {other}") + } + } +} + +fn render_chat_template( + model: *const llama_cpp_bindings_sys::llama_model, + messages_json: &str, +) -> Result, MarkerDetectionError> { + // SAFETY: `messages_json` is serde_json output, which never emits an interior + // NUL byte, so the unchecked CString construction has no NUL to trip over. + let messages = unsafe { CString::from_vec_unchecked(messages_json.as_bytes().to_vec()) }; + let mut out_rendered: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_render_chat_template( + model, + messages.as_ptr(), + 0, + 1, + &raw mut out_rendered, + &raw mut out_error, + ) + }; + + let parsed = unsafe { render_chat_template_status_to_result(status, out_rendered, out_error) }; + + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_rendered) }; + if !cxx_exception_owns_out_error(&parsed) { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + } + + parsed +} + +// The reasoning-marker probe is best-effort. A template that cannot render the +// probe's structured-content messages (e.g. a Jinja template expecting string +// content throws "unexpected item type in content") simply makes the probe +// inapplicable, yielding no markers — mirroring the original C++ probe's +// catch-and-continue. Genuine resource failures still propagate. +fn render_probe_messages( + model: *const llama_cpp_bindings_sys::llama_model, + messages_json: &str, +) -> Result, MarkerDetectionError> { + match render_chat_template(model, messages_json) { + Ok(rendered) => Ok(rendered), + Err(MarkerDetectionError::ReasoningMarkerDetectionFailed { .. }) => Ok(None), + Err(other) => Err(other), + } +} + +fn detect_reasoning_markers_via_template_probe( + model: *const llama_cpp_bindings_sys::llama_model, +) -> Result, MarkerDetectionError> { + use crate::extract_reasoning_markers_from_probe_renders::chunked_probe_messages_json; + use crate::extract_reasoning_markers_from_probe_renders::extract_reasoning_markers_from_probe_renders; + use crate::extract_reasoning_markers_from_probe_renders::plain_probe_messages_json; + + let Some(plain_render) = render_probe_messages(model, &plain_probe_messages_json())? else { + return Ok(None); + }; + let Some(chunked_render) = render_probe_messages(model, &chunked_probe_messages_json())? else { + return Ok(None); + }; + + Ok(extract_reasoning_markers_from_probe_renders( + &plain_render, + &chunked_render, + )) +} + // SAFETY: `out_haystack` and `out_error` must be the pointers populated by the // preceding `llama_rs_compute_tool_call_haystack` call (or null). `out_haystack` // is read but not freed here; `out_error` is freed only in the CXX-exception @@ -1905,6 +2107,7 @@ mod ffi_status_mapping_tests { use llama_cpp_bindings_types::ToolCallArguments; use super::ReasoningSplit; + use super::chat_parser_create_status_to_result; use super::compute_tool_call_haystack_status_to_result; use super::cxx_exception_owns_out_error; use super::detect_reasoning_markers_status_to_result; @@ -1920,6 +2123,7 @@ mod ffi_status_mapping_tests { use super::parsed_chat_tool_call_id_status_to_result; use super::parsed_chat_tool_call_name_status_to_result; use super::reasoning_markers_from_marker_pair; + use super::render_chat_template_status_to_result; use super::split_reasoning_prefix; use super::tokenize_status_to_result; use crate::ChatMessageParseOutcome; @@ -2043,11 +2247,11 @@ mod ffi_status_mapping_tests { } #[test] - fn parse_chat_message_no_chat_template_maps_to_no_chat_template() { + fn chat_parser_create_no_chat_template_maps_to_no_chat_template() { let mut out_error: *mut c_char = ptr::null_mut(); let result = unsafe { - parse_chat_message_status_to_result( - llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE, + chat_parser_create_status_to_result( + llama_cpp_bindings_sys::LLAMA_RS_CHAT_PARSER_CREATE_MODEL_HAS_NO_CHAT_TEMPLATE, ptr::null_mut(), &raw mut out_error, ) @@ -2060,11 +2264,11 @@ mod ffi_status_mapping_tests { } #[test] - fn parse_chat_message_no_vocab_maps_to_no_vocab() { + fn chat_parser_create_no_vocab_maps_to_no_vocab() { let mut out_error: *mut c_char = ptr::null_mut(); let result = unsafe { - parse_chat_message_status_to_result( - llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB, + chat_parser_create_status_to_result( + llama_cpp_bindings_sys::LLAMA_RS_CHAT_PARSER_CREATE_MODEL_HAS_NO_VOCAB, ptr::null_mut(), &raw mut out_error, ) @@ -2076,6 +2280,69 @@ mod ffi_status_mapping_tests { ); } + #[test] + fn chat_parser_create_allocation_failed_is_not_enough_memory() { + let mut out_error: *mut c_char = ptr::null_mut(); + let result = unsafe { + chat_parser_create_status_to_result( + llama_cpp_bindings_sys::LLAMA_RS_CHAT_PARSER_CREATE_ERROR_STRING_ALLOCATION_FAILED, + ptr::null_mut(), + &raw mut out_error, + ) + }; + + assert_eq!( + discriminant(&result.unwrap_err()), + discriminant(&ParseChatMessageError::NotEnoughMemory) + ); + } + + #[test] + fn chat_parser_create_cxx_exception_is_parse_failed_and_nulls_error() { + let mut out_error: *mut c_char = ptr::null_mut(); + let result = unsafe { + chat_parser_create_status_to_result( + llama_cpp_bindings_sys::LLAMA_RS_CHAT_PARSER_CREATE_VENDORED_THREW_CXX_EXCEPTION, + ptr::null_mut(), + &raw mut out_error, + ) + }; + + assert_eq!( + discriminant(&result.unwrap_err()), + discriminant(&ParseChatMessageError::ParseFailed { + message: String::new() + }) + ); + assert!(out_error.is_null()); + } + + #[test] + #[should_panic(expected = "llama_rs_chat_parser_create returned OK with a null parser handle")] + fn chat_parser_create_ok_with_null_parser_panics() { + let mut out_error: *mut c_char = ptr::null_mut(); + let _ = unsafe { + chat_parser_create_status_to_result( + llama_cpp_bindings_sys::LLAMA_RS_CHAT_PARSER_CREATE_OK, + ptr::null_mut(), + &raw mut out_error, + ) + }; + } + + #[test] + #[should_panic(expected = "llama_rs_chat_parser_create returned unrecognized status")] + fn chat_parser_create_unrecognized_status_panics() { + let mut out_error: *mut c_char = ptr::null_mut(); + let _ = unsafe { + chat_parser_create_status_to_result( + llama_cpp_bindings_sys::llama_rs_chat_parser_create_status::MAX, + ptr::null_mut(), + &raw mut out_error, + ) + }; + } + #[test] fn parse_chat_message_allocation_failed_is_not_enough_memory() { let mut out_error: *mut c_char = ptr::null_mut(); @@ -2632,6 +2899,92 @@ mod ffi_status_mapping_tests { }; } + #[test] + fn render_chat_template_status_ok_reads_rendered() { + let rendered = std::ffi::CString::new("hi").expect("test render string"); + let result = unsafe { + render_chat_template_status_to_result( + llama_cpp_bindings_sys::LLAMA_RS_RENDER_CHAT_TEMPLATE_OK, + rendered.as_ptr(), + ptr::null_mut(), + ) + }; + + assert_eq!(result.expect("ok render"), Some("hi".to_owned())); + } + + #[test] + fn render_chat_template_status_no_chat_template_is_none() { + let result = unsafe { + render_chat_template_status_to_result( + llama_cpp_bindings_sys::LLAMA_RS_RENDER_CHAT_TEMPLATE_MODEL_HAS_NO_CHAT_TEMPLATE, + ptr::null(), + ptr::null_mut(), + ) + }; + + assert_eq!(result.expect("none"), None); + } + + #[test] + fn render_chat_template_status_no_vocab_is_none() { + let result = unsafe { + render_chat_template_status_to_result( + llama_cpp_bindings_sys::LLAMA_RS_RENDER_CHAT_TEMPLATE_MODEL_HAS_NO_VOCAB, + ptr::null(), + ptr::null_mut(), + ) + }; + + assert_eq!(result.expect("none"), None); + } + + #[test] + fn render_chat_template_status_allocation_failed_is_not_enough_memory() { + let result = unsafe { + render_chat_template_status_to_result( + llama_cpp_bindings_sys::LLAMA_RS_RENDER_CHAT_TEMPLATE_ERROR_STRING_ALLOCATION_FAILED, + ptr::null(), + ptr::null_mut(), + ) + }; + + assert_eq!( + discriminant(&result.unwrap_err()), + discriminant(&MarkerDetectionError::NotEnoughMemory) + ); + } + + #[test] + fn render_chat_template_status_cxx_exception_is_reported() { + let result = unsafe { + render_chat_template_status_to_result( + llama_cpp_bindings_sys::LLAMA_RS_RENDER_CHAT_TEMPLATE_VENDORED_THREW_CXX_EXCEPTION, + ptr::null(), + ptr::null_mut(), + ) + }; + + assert_eq!( + discriminant(&result.unwrap_err()), + discriminant(&MarkerDetectionError::ReasoningMarkerDetectionFailed { + message: String::new() + }) + ); + } + + #[test] + #[should_panic(expected = "llama_rs_render_chat_template returned unrecognized status")] + fn render_chat_template_status_unrecognized_panics() { + let _ = unsafe { + render_chat_template_status_to_result( + llama_cpp_bindings_sys::llama_rs_render_chat_template_status::MAX, + ptr::null(), + ptr::null_mut(), + ) + }; + } + #[test] fn detect_reasoning_markers_ok_with_null_pointers_is_none_pair() { let result = unsafe { diff --git a/llama-cpp-bindings/src/model/params.rs b/llama-cpp-bindings/src/model/params.rs index e3a615e2..1506b564 100644 --- a/llama-cpp-bindings/src/model/params.rs +++ b/llama-cpp-bindings/src/model/params.rs @@ -1,3 +1,8 @@ +use std::ffi::{CStr, c_char}; +use std::fmt::{Debug, Formatter}; +use std::pin::Pin; +use std::ptr::null; + use crate::LlamaCppError; use crate::context::params::LlamaContextParams; use crate::error::{FitError, ModelParamsError}; @@ -5,10 +10,6 @@ use crate::model::llama_split_mode_parse_error::LlamaSplitModeParseError; use crate::model::params::fit_result::FitResult; use crate::model::params::kv_overrides::KvOverrides; use crate::model::split_mode::LlamaSplitMode; -use std::ffi::{CStr, c_char}; -use std::fmt::{Debug, Formatter}; -use std::pin::Pin; -use std::ptr::null; pub mod fit_result; pub mod kv_override_value_iterator; diff --git a/llama-cpp-bindings/src/mtmd/image_chunk_batch_size_mismatch.rs b/llama-cpp-bindings/src/mtmd/image_chunk_batch_size_mismatch.rs index a5ccb85d..aff6affe 100644 --- a/llama-cpp-bindings/src/mtmd/image_chunk_batch_size_mismatch.rs +++ b/llama-cpp-bindings/src/mtmd/image_chunk_batch_size_mismatch.rs @@ -1,5 +1,5 @@ #[derive(Debug, PartialEq, Eq)] pub struct ImageChunkBatchSizeMismatch { - pub image_tokens: u32, - pub n_batch: u32, + pub image_tokens: usize, + pub n_batch: i32, } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs b/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs index bfc24c7a..730b7b62 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs @@ -127,15 +127,17 @@ impl MtmdBitmap { /// /// * `NullResult` - Buffer could not be processed pub fn from_buffer(ctx: &MtmdContext, data: &[u8]) -> Result { - let bitmap = unsafe { + let bitmap_wrapper = unsafe { llama_cpp_bindings_sys::mtmd_helper_bitmap_init_from_buf( ctx.context.as_ptr(), data.as_ptr(), data.len(), + false, ) }; - let bitmap = NonNull::new(bitmap).ok_or(MtmdBitmapError::BitmapDecodeFailed)?; + let bitmap = + NonNull::new(bitmap_wrapper.bitmap).ok_or(MtmdBitmapError::BitmapDecodeFailed)?; Ok(Self { bitmap }) } @@ -262,12 +264,9 @@ mod tests { #[test] fn from_audio_data_creates_valid_bitmap() { - #[expect( - clippy::cast_precision_loss, - reason = "test fixture casts a small i32 (0..100) to f32 to synthesise a sine wave; \ - the values are well within f32's exact-representation range" - )] - let audio_samples: Vec = (0..100).map(|index| (index as f32 * 0.1).sin()).collect(); + let audio_samples: Vec = (0u8..100) + .map(|index| (f32::from(index) * 0.1).sin()) + .collect(); let bitmap = MtmdBitmap::from_audio_data(&audio_samples).unwrap(); assert!(bitmap.is_audio()); diff --git a/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs b/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs index 29f99835..3496ae4f 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs @@ -68,15 +68,10 @@ fn image_chunk_batch_size_error( if is_image_chunk && i64::try_from(chunk_token_count).is_ok_and(|tokens| tokens > i64::from(n_batch)) { - #[expect( - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - reason = "image token counts and n_batch are model-bounded and fit in u32" - )] return Some(MtmdEvalError::ImageChunkExceedsBatchSize( ImageChunkBatchSizeMismatch { - image_tokens: chunk_token_count as u32, - n_batch: n_batch as u32, + image_tokens: chunk_token_count, + n_batch, }, )); } diff --git a/llama-cpp-bindings/src/sampled_token_classifier.rs b/llama-cpp-bindings/src/sampled_token_classifier.rs index 24bd52ab..c3499e37 100644 --- a/llama-cpp-bindings/src/sampled_token_classifier.rs +++ b/llama-cpp-bindings/src/sampled_token_classifier.rs @@ -11,14 +11,16 @@ use crate::context::LlamaContext; use crate::error::EvalMultimodalChunksError; use crate::error::SampleError; use crate::error::TokenToStringError; +use crate::eval_multimodal_chunks_params::EvalMultimodalChunksParams; use crate::llama_batch::LlamaBatch; +use crate::marker_kind::MarkerKind; use crate::model::LlamaModel; use crate::mtmd::MtmdContext; use crate::mtmd::MtmdInputChunks; use crate::sampled_token::SampledToken; use crate::sampling::LlamaSampler; use crate::streaming_json_probe::JsonProbeOutcome; -use crate::streaming_markers::{MarkerKind, StreamingMarkers}; +use crate::streaming_markers::StreamingMarkers; use crate::token::LlamaToken; pub use crate::ingest_outcome::IngestOutcome; @@ -455,35 +457,28 @@ impl<'model> SampledTokenClassifier<'model> { /// type unknown to this binding, or /// [`EvalMultimodalChunksError::ChunkOutOfBounds`] when a valid index returns /// `None` from `chunks.get`. - #[expect( - clippy::too_many_arguments, - reason = "thin wrapper over MtmdInputChunks::eval_chunks; parameter shape mirrors the underlying API" - )] pub fn eval_multimodal_chunks( &mut self, chunks: &MtmdInputChunks, mtmd_ctx: &MtmdContext, llama_ctx: &LlamaContext, - start_position: llama_pos, - seq_id: llama_seq_id, - n_batch: i32, - logits_last: bool, + params: EvalMultimodalChunksParams, ) -> Result { let chunk_count = chunks.len(); - let mut next_position = start_position; + let mut next_position = params.start_position; for index in 0..chunk_count { let chunk = chunks .get(index) .ok_or(EvalMultimodalChunksError::ChunkOutOfBounds(index))?; - let logits_for_this_chunk = logits_last && index + 1 == chunk_count; + let logits_for_this_chunk = params.logits_last && index + 1 == chunk_count; next_position = chunk.eval_single( mtmd_ctx, llama_ctx, next_position, - seq_id, - n_batch, + params.seq_id, + params.n_batch, logits_for_this_chunk, )?; crate::ingest_prompt_chunk::ingest_prompt_chunk(self, &chunk)?; diff --git a/llama-cpp-bindings/src/send_logs_to_log.rs b/llama-cpp-bindings/src/send_logs_to_log.rs index 96365b0e..15998057 100644 --- a/llama-cpp-bindings/src/send_logs_to_log.rs +++ b/llama-cpp-bindings/src/send_logs_to_log.rs @@ -1,7 +1,12 @@ -#![deny(clippy::expect_used)] -#![deny(clippy::indexing_slicing)] -#![deny(clippy::panic)] -#![deny(clippy::unwrap_used)] +#![cfg_attr( + not(test), + deny( + clippy::expect_used, + clippy::indexing_slicing, + clippy::panic, + clippy::unwrap_used + ) +)] use std::sync::{Mutex, OnceLock}; @@ -467,10 +472,6 @@ mod tests { } #[test] - #[expect( - clippy::panic, - reason = "deliberate panic to poison the decoder mutex for fault-injection coverage" - )] fn decoder_mutex_poison() { ensure_test_logger_installed(); diff --git a/llama-cpp-bindings/src/streaming_markers.rs b/llama-cpp-bindings/src/streaming_markers.rs index e34636f7..03be06b9 100644 --- a/llama-cpp-bindings/src/streaming_markers.rs +++ b/llama-cpp-bindings/src/streaming_markers.rs @@ -1,13 +1,6 @@ +use crate::marker_kind::MarkerKind; use crate::token::LlamaToken; -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum MarkerKind { - ReasoningOpen, - ReasoningClose, - ToolCallOpen, - ToolCallClose, -} - #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct StreamingMarkers { pub reasoning_open: Option>, diff --git a/llama-cpp-bindings/src/tool_call_format/paired_quote_args.rs b/llama-cpp-bindings/src/tool_call_format/paired_quote_args.rs index 3f261882..8e1f21a5 100644 --- a/llama-cpp-bindings/src/tool_call_format/paired_quote_args.rs +++ b/llama-cpp-bindings/src/tool_call_format/paired_quote_args.rs @@ -217,11 +217,6 @@ pub fn parse( #[cfg(test)] mod tests { - #![expect( - clippy::literal_string_with_formatting_args, - reason = "Gemma tool-call format literals contain braces that resemble format args" - )] - use llama_cpp_bindings_types::PairedQuoteShape; use llama_cpp_bindings_types::ToolCallArgsShape; use llama_cpp_bindings_types::ToolCallArguments; diff --git a/llama-cpp-test-harness/Cargo.toml b/llama-cpp-test-harness/Cargo.toml index 477362da..08febc18 100644 --- a/llama-cpp-test-harness/Cargo.toml +++ b/llama-cpp-test-harness/Cargo.toml @@ -32,6 +32,7 @@ unused_qualifications = "warn" [lints.clippy] all = { level = "deny", priority = -1 } -pedantic = { level = "warn", priority = -1 } -nursery = { level = "warn", priority = -1 } module_name_repetitions = "allow" +nursery = { level = "warn", priority = -1 } +pedantic = { level = "warn", priority = -1 } +unnecessary_wraps = "allow" diff --git a/llama-cpp-test-harness/tests/harness_self_test.rs b/llama-cpp-test-harness/tests/harness_self_test.rs index d815d24f..333e0f4a 100644 --- a/llama-cpp-test-harness/tests/harness_self_test.rs +++ b/llama-cpp-test-harness/tests/harness_self_test.rs @@ -1,8 +1,3 @@ -#![expect( - clippy::unnecessary_wraps, - reason = "every trial returns anyhow::Result<()> to match the LlamaTestFn signature" -)] - use std::process::ExitCode; use anyhow::Result;