mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-11 01:34:04 +02:00
llama : offload to RPC in addition to other backends (llama/7640)
* llama : offload to RPC in addition to other backends * - fix copy_tensor being called on the src buffer instead of the dst buffer - always initialize views in the view_src buffer - add RPC backend to Makefile build - add endpoint to all RPC object names * add rpc-server to Makefile * Update llama.cpp Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
committed by
Georgi Gerganov
parent
e5e38d4920
commit
6cc3b022ee
@ -750,7 +750,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
|
||||
// this tensor was allocated without ggml-backend
|
||||
return;
|
||||
}
|
||||
ggml_backend_view_init(galloc->buffers[buffer_id], tensor);
|
||||
ggml_backend_view_init(tensor);
|
||||
}
|
||||
} else {
|
||||
if (tensor->data == NULL) {
|
||||
@ -899,12 +899,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
|
||||
if (t->view_src == NULL) {
|
||||
ggml_tallocr_alloc(&tallocr, t);
|
||||
} else if (t->buffer == NULL) {
|
||||
ggml_backend_view_init(buffer, t);
|
||||
ggml_backend_view_init(t);
|
||||
}
|
||||
} else {
|
||||
if (t->view_src != NULL && t->buffer == NULL) {
|
||||
// view of a pre-allocated tensor
|
||||
ggml_backend_view_init(buffer, t);
|
||||
ggml_backend_view_init(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user