talk-llama : sync llama.cpp

ggml-ci
2025-08-14 03:18:42 +02:00 · 2025-06-20 21:18:44 +03:00
parent d65a579a0a
commit e6c10cf3d5
28 changed files with 2521 additions and 1738 deletions
--- a/examples/talk-llama/llama.h
+++ b/examples/talk-llama/llama.h
@ -1044,6 +1044,7 @@ extern "C" {

    LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab);
    LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab);
+    LLAMA_API bool llama_vocab_get_add_sep(const struct llama_vocab * vocab);

    LLAMA_API llama_token llama_vocab_fim_pre(const struct llama_vocab * vocab);
    LLAMA_API llama_token llama_vocab_fim_suf(const struct llama_vocab * vocab);
@ -1087,6 +1088,7 @@ extern "C" {
    /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
    /// @return Returns the number of tokens on success, no more than n_tokens_max
    /// @return Returns a negative number on failure - the number of tokens that would have been returned
+    /// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
    /// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
    /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
    ///                      as plaintext. Does not insert a leading space.