talk-llama : sync llama.cpp

ggml-ci
2025-08-09 10:35:51 +02:00 · 2025-05-13 13:20:19 +03:00
parent a14c89aefa
commit f890560575
25 changed files with 2847 additions and 1125 deletions
--- a/examples/talk-llama/llama-model.h
+++ b/examples/talk-llama/llama-model.h
@ -76,6 +76,7 @@ enum llm_type {
    LLM_TYPE_236B,
    LLM_TYPE_290B,
    LLM_TYPE_314B,
+    LLM_TYPE_405B,
    LLM_TYPE_671B,
    LLM_TYPE_SMALL,
    LLM_TYPE_MEDIUM,
@ -95,6 +96,8 @@ enum llm_type {
    LLM_TYPE_235B_A22B,
 };

+std::string llama_rope_scaling_type_name(llama_rope_scaling_type rope_scaling_type);
+
 struct llama_layer_posnet {
    // resnet
    struct ggml_tensor * norm1   = nullptr;
@ -395,8 +398,11 @@ struct llama_model {

    const struct ggml_tensor * get_tensor(const char * name) const;

+    ggml_tensor * get_rope_factors(uint32_t n_ctx_per_seq, int il) const;
+
+    // note: can mutate `cparams`
    // TODO: move this to new llm_arch_model_i interface
-    llama_memory_i * create_memory() const; // TODO: params
+    llama_memory_i * create_memory(const llama_memory_params & params, llama_cparams & cparams) const;

    // TODO: move this to new llm_arch_model_i interface
    llm_graph_result_ptr build_graph(