mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-13 17:38:36 +01:00
llama : pre-allocate input tensors in a separate buffer (llama/5100)
This commit is contained in:
parent
55d54359e0
commit
94c1ae8668
@ -109,8 +109,8 @@ void ggml_tallocr_alloc(ggml_tallocr_t alloc, struct ggml_tensor * tensor) {
|
|||||||
if (block->size >= size) {
|
if (block->size >= size) {
|
||||||
best_fit_block = alloc->n_free_blocks - 1;
|
best_fit_block = alloc->n_free_blocks - 1;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
|
fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, largest block available %zu)\n",
|
||||||
__func__, size, max_avail);
|
__func__, tensor->name, size, max_avail);
|
||||||
GGML_ASSERT(!"not enough space in the buffer");
|
GGML_ASSERT(!"not enough space in the buffer");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user