talk-llama : fix build after ggml sync (#1049)

sed -i 's,GGML_BACKEND_CUDA,GGML_BACKEND_GPU,g' examples/talk-llama/llama.cpp
This commit is contained in:
Przemysław Pawełczyk 2023-06-25 15:13:50 +02:00 committed by GitHub
parent f1c9df5806
commit 62642bb61c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1002,7 +1002,7 @@ static void llama_model_load_internal(
} }
#ifdef GGML_USE_CUBLAS #ifdef GGML_USE_CUBLAS
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CUDA #define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
#else #else
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CPU #define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CPU
#endif #endif
@ -1054,7 +1054,7 @@ static void llama_model_load_internal(
layer.w2 = ml->get_tensor(layers_i + ".feed_forward.w2.weight", { n_ff, n_embd}, backend); layer.w2 = ml->get_tensor(layers_i + ".feed_forward.w2.weight", { n_ff, n_embd}, backend);
layer.w3 = ml->get_tensor(layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}, backend); layer.w3 = ml->get_tensor(layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}, backend);
if (backend == GGML_BACKEND_CUDA) { if (backend == GGML_BACKEND_GPU) {
vram_total += vram_total +=
ggml_nbytes(layer.attention_norm) + ggml_nbytes(layer.wq) + ggml_nbytes(layer.wk) + ggml_nbytes(layer.attention_norm) + ggml_nbytes(layer.wq) + ggml_nbytes(layer.wk) +
ggml_nbytes(layer.wv) + ggml_nbytes(layer.wo) + ggml_nbytes(layer.attention_norm) + ggml_nbytes(layer.wv) + ggml_nbytes(layer.wo) + ggml_nbytes(layer.attention_norm) +
@ -1115,7 +1115,7 @@ static void llama_model_load_internal(
} }
} }
for (llama_load_tensor & lt : ml->tensors_map.tensors) { for (llama_load_tensor & lt : ml->tensors_map.tensors) {
if (lt.ggml_tensor->backend != GGML_BACKEND_CUDA) { if (lt.ggml_tensor->backend != GGML_BACKEND_GPU) {
continue; continue;
} }
if (progress_callback) { if (progress_callback) {