talk-llama : sync llama.cpp (#3084)

ggml-ci
2025-08-19 08:02:15 +02:00 · 2025-04-28 16:40:23 +03:00
parent 28dcdff4c5
commit f3c42399a3
36 changed files with 16940 additions and 12400 deletions
--- a/examples/talk-llama/llama-grammar.h
+++ b/examples/talk-llama/llama-grammar.h
@@ -3,6 +3,7 @@
 #include "llama.h"

 #include <map>
+#include <regex>
 #include <string>
 #include <vector>

@@ -105,6 +106,11 @@ struct llama_grammar_parser {
    void print(FILE * file);
 };

+struct llama_grammar_trigger_pattern {
+    std::string pattern;
+    std::regex  regex;
+};
+
 struct llama_grammar {
    // note: allow null vocab for testing (not great)
    const llama_vocab * vocab;
@@ -116,13 +122,16 @@ struct llama_grammar {
    llama_partial_utf8 partial_utf8;

    // lazy grammars wait for trigger words or tokens before constraining the sampling.
-    // we still ahve trigger_tokens for non-lazy grammars to force printing of special trigger tokens.
+    // we still have trigger_tokens for non-lazy grammars to force printing of special trigger tokens.
    // (useful e.g. for tool_choice=required)
    bool                     lazy             = false;
    bool                     awaiting_trigger = false; // Initialized to true for lazy grammars only
    std::string              trigger_buffer;           // Output buffered by lazy grammar. Will be cleared once trigger is found.
    std::vector<llama_token> trigger_tokens;           // Tokens that trigger a lazy grammar, or tokens to force printing of (even if special).
-    std::vector<std::string> trigger_words;
+    std::vector<llama_grammar_trigger_pattern>
+                             trigger_patterns;         // Regular expressions that trigger a lazy grammar. Must be a full match of the entire generated
+                                                       // string, and the grammar will be given the string from the first match group onwards.
+
 };

 //
@@ -141,8 +150,8 @@ struct llama_grammar * llama_grammar_init_impl(
                      const char * grammar_str,
                      const char * grammar_root,
                              bool lazy,
-                     const char ** trigger_words,
-                            size_t num_trigger_words,
+                     const char ** trigger_patterns,
+                            size_t num_trigger_patterns,
               const llama_token * trigger_tokens,
                            size_t num_trigger_tokens);