llama : pre-allocate input tensors in a separate buffer (llama/5100)

This commit is contained in:
slaren 2024-01-24 12:48:14 +01:00 committed by Georgi Gerganov
parent 55d54359e0
commit 94c1ae8668
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -109,8 +109,8 @@ void ggml_tallocr_alloc(ggml_tallocr_t alloc, struct ggml_tensor * tensor) {
if (block->size >= size) {
best_fit_block = alloc->n_free_blocks - 1;
} else {
fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
__func__, size, max_avail);
fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, largest block available %zu)\n",
__func__, tensor->name, size, max_avail);
GGML_ASSERT(!"not enough space in the buffer");
return;
}