mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-28 01:29:17 +01:00
ggml : backport llama.cpp updates (close #709)
- About x2 overall performance improvement on Apple Silicon - Results should now be the same for different number of threads (not tested)
This commit is contained in:
parent
0a2d1210bc
commit
69b8503935
113
ggml.h
113
ggml.h
@ -236,6 +236,7 @@ enum ggml_op {
|
||||
|
||||
GGML_OP_SCALE,
|
||||
GGML_OP_CPY,
|
||||
GGML_OP_CONT,
|
||||
GGML_OP_RESHAPE,
|
||||
GGML_OP_VIEW,
|
||||
GGML_OP_PERMUTE,
|
||||
@ -253,16 +254,29 @@ enum ggml_op {
|
||||
GGML_OP_COUNT,
|
||||
};
|
||||
|
||||
|
||||
// ggml object
|
||||
struct ggml_object {
|
||||
size_t offs;
|
||||
size_t size;
|
||||
|
||||
struct ggml_object * next;
|
||||
|
||||
char padding[8];
|
||||
};
|
||||
|
||||
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
|
||||
|
||||
// n-dimensional tensor
|
||||
struct ggml_tensor {
|
||||
enum ggml_type type;
|
||||
|
||||
int n_dims;
|
||||
int ne[GGML_MAX_DIMS]; // number of elements
|
||||
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
|
||||
// nb[0] = sizeof(type)
|
||||
// nb[1] = nb[0] * ne[0] + padding
|
||||
// nb[i] = nb[i-1] * ne[i-1]
|
||||
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
||||
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
|
||||
// nb[0] = sizeof(type)
|
||||
// nb[1] = nb[0] * ne[0] + padding
|
||||
// nb[i] = nb[i-1] * ne[i-1]
|
||||
|
||||
// compute data
|
||||
enum ggml_op op;
|
||||
@ -316,6 +330,7 @@ struct ggml_init_params {
|
||||
// memory pool
|
||||
size_t mem_size; // bytes
|
||||
void * mem_buffer; // if NULL, memory will be allocated internally
|
||||
bool no_alloc; // don't allocate memory for the tensor data
|
||||
};
|
||||
|
||||
void ggml_time_init(void); // call this once at the beginning of the program
|
||||
@ -327,8 +342,8 @@ int64_t ggml_cycles_per_ms(void);
|
||||
void ggml_print_object (const struct ggml_object * obj);
|
||||
void ggml_print_objects(const struct ggml_context * ctx);
|
||||
|
||||
int ggml_nelements(const struct ggml_tensor * tensor);
|
||||
size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
||||
int64_t ggml_nelements(const struct ggml_tensor * tensor);
|
||||
size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
||||
|
||||
int ggml_blck_size (enum ggml_type type);
|
||||
size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
|
||||
@ -343,40 +358,37 @@ size_t ggml_used_mem(const struct ggml_context * ctx);
|
||||
|
||||
size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
||||
|
||||
bool ggml_mlock_supported(void);
|
||||
bool ggml_mlock(struct ggml_context * ctx, char ** err_p);
|
||||
|
||||
struct ggml_tensor * ggml_new_tensor(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
int n_dims,
|
||||
const int *ne);
|
||||
const int64_t *ne);
|
||||
|
||||
struct ggml_tensor * ggml_new_tensor_1d(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
int ne0);
|
||||
int64_t ne0);
|
||||
|
||||
struct ggml_tensor * ggml_new_tensor_2d(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
int ne0,
|
||||
int ne1);
|
||||
int64_t ne0,
|
||||
int64_t ne1);
|
||||
|
||||
struct ggml_tensor * ggml_new_tensor_3d(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
int ne0,
|
||||
int ne1,
|
||||
int ne2);
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
int64_t ne2);
|
||||
|
||||
struct ggml_tensor * ggml_new_tensor_4d(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
int ne0,
|
||||
int ne1,
|
||||
int ne2,
|
||||
int ne3);
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
int64_t ne2,
|
||||
int64_t ne3);
|
||||
|
||||
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
|
||||
struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
||||
@ -514,6 +526,11 @@ struct ggml_tensor * ggml_cpy(
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
// make contiguous
|
||||
struct ggml_tensor * ggml_cont(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// return view(a), b specifies the new shape
|
||||
// TODO: when we start computing gradient, make a copy instead of view
|
||||
struct ggml_tensor * ggml_reshape(
|
||||
@ -526,33 +543,43 @@ struct ggml_tensor * ggml_reshape(
|
||||
struct ggml_tensor * ggml_reshape_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int ne0,
|
||||
int ne1);
|
||||
int64_t ne0,
|
||||
int64_t ne1);
|
||||
|
||||
// return view(a)
|
||||
// TODO: when we start computing gradient, make a copy instead of view
|
||||
struct ggml_tensor * ggml_reshape_3d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int ne0,
|
||||
int ne1,
|
||||
int ne2);
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
int64_t ne2);
|
||||
|
||||
// offset in bytes
|
||||
struct ggml_tensor * ggml_view_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int ne0,
|
||||
int64_t ne0,
|
||||
size_t offset);
|
||||
|
||||
struct ggml_tensor * ggml_view_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int ne0,
|
||||
int ne1,
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
size_t nb1, // row stride in bytes
|
||||
size_t offset);
|
||||
|
||||
struct ggml_tensor * ggml_view_3d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
int64_t ne2,
|
||||
size_t nb1, // row stride in bytes
|
||||
size_t nb2, // slice stride in bytes
|
||||
size_t offset);
|
||||
|
||||
struct ggml_tensor * ggml_permute(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
@ -748,8 +775,8 @@ enum ggml_opt_result ggml_opt(
|
||||
// quantization
|
||||
//
|
||||
|
||||
size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int qk, int64_t * hist);
|
||||
size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int qk, int64_t * hist);
|
||||
size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
|
||||
//
|
||||
// system info
|
||||
@ -768,6 +795,30 @@ int ggml_cpu_has_blas(void);
|
||||
int ggml_cpu_has_sse3(void);
|
||||
int ggml_cpu_has_vsx(void);
|
||||
|
||||
|
||||
//
|
||||
// Internal types and functions exposed for tests and benchmarks
|
||||
//
|
||||
|
||||
#ifdef __cplusplus
|
||||
// restrict not standard in C++
|
||||
#define GGML_RESTRICT
|
||||
#else
|
||||
#define GGML_RESTRICT restrict
|
||||
#endif
|
||||
typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||
typedef void (*quantize_row_q_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||
typedef void (*vec_dot_q_t)(const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
|
||||
|
||||
typedef struct {
|
||||
dequantize_row_q_t dequantize_row_q;
|
||||
quantize_row_q_t quantize_row_q;
|
||||
quantize_row_q_t quantize_row_q_reference;
|
||||
vec_dot_q_t vec_dot_q;
|
||||
} quantize_fns_t;
|
||||
|
||||
quantize_fns_t ggml_internal_get_quantize_fn(size_t i);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
133
whisper.cpp
133
whisper.cpp
@ -654,9 +654,11 @@ static bool kv_cache_init(
|
||||
int n_ctx) {
|
||||
cache.buf.resize(mem_bytes);
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = cache.buf.size();
|
||||
params.mem_buffer = cache.buf.data();
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ cache.buf.size(),
|
||||
/*.mem_buffer =*/ cache.buf.data(),
|
||||
/*.no_alloc =*/ false,
|
||||
};
|
||||
|
||||
cache.ctx = ggml_init(params);
|
||||
|
||||
@ -688,9 +690,11 @@ static bool kv_cache_reinit(struct whisper_kv_cache & cache) {
|
||||
|
||||
WHISPER_ASSERT(cache.buf.size() >= 2*n_elements*ggml_type_size(wtype));
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = cache.buf.size();
|
||||
params.mem_buffer = cache.buf.data();
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ cache.buf.size(),
|
||||
/*.mem_buffer =*/ cache.buf.data(),
|
||||
/*.no_alloc =*/ false,
|
||||
};
|
||||
|
||||
cache.ctx = ggml_init(params);
|
||||
|
||||
@ -1028,9 +1032,11 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
|
||||
|
||||
// create the ggml context
|
||||
{
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = wctx.model.buf->size();
|
||||
params.mem_buffer = wctx.model.buf->data();
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ wctx.model.buf->size(),
|
||||
/*.mem_buffer =*/ wctx.model.buf->data(),
|
||||
/*.no_alloc =*/ false,
|
||||
};
|
||||
|
||||
model.ctx = ggml_init(params);
|
||||
if (!model.ctx) {
|
||||
@ -1254,10 +1260,12 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
|
||||
break;
|
||||
}
|
||||
|
||||
int32_t nelements = 1;
|
||||
int32_t ne[3] = { 1, 1, 1 };
|
||||
int64_t nelements = 1;
|
||||
int64_t ne[3] = { 1, 1, 1 };
|
||||
for (int i = 0; i < n_dims; ++i) {
|
||||
read_safe(loader, ne[i]);
|
||||
int32_t ne_cur;
|
||||
read_safe(loader, ne_cur);
|
||||
ne[i] = ne_cur;
|
||||
nelements *= ne[i];
|
||||
}
|
||||
|
||||
@ -1278,7 +1286,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
|
||||
}
|
||||
|
||||
if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1] || tensor->ne[2] != ne[2]) {
|
||||
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d, %d], expected [%d, %d, %d]\n",
|
||||
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%lld, %lld, %lld], expected [%lld, %lld, %lld]\n",
|
||||
__func__, name.data(), tensor->ne[0], tensor->ne[1], tensor->ne[2], ne[0], ne[1], ne[2]);
|
||||
return false;
|
||||
}
|
||||
@ -1286,7 +1294,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
|
||||
const size_t bpe = (ftype == 0) ? sizeof(float) : sizeof(ggml_fp16_t);
|
||||
|
||||
if (nelements*bpe != ggml_nbytes(tensor)) {
|
||||
fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
|
||||
fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %llu\n",
|
||||
__func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
|
||||
return false;
|
||||
}
|
||||
@ -1344,9 +1352,11 @@ static bool whisper_encode_internal(
|
||||
const int n_mels = hparams.n_mels;
|
||||
assert(mel_inp.n_mel == n_mels);
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = wstate.buf_compute.size();
|
||||
params.mem_buffer = wstate.buf_compute.data();
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ wstate.buf_compute.size(),
|
||||
/*.mem_buffer =*/ wstate.buf_compute.data(),
|
||||
/*.no_alloc =*/ false,
|
||||
};
|
||||
|
||||
struct ggml_context * ctx0 = ggml_init(params);
|
||||
|
||||
@ -1501,8 +1511,7 @@ static bool whisper_encode_internal(
|
||||
Vcur,
|
||||
n_state/n_head, n_head, n_ctx),
|
||||
1, 2, 0, 3),
|
||||
ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head)
|
||||
);
|
||||
ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head));
|
||||
|
||||
struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false);
|
||||
#else
|
||||
@ -1726,10 +1735,12 @@ static bool whisper_encode_internal(
|
||||
|
||||
wstate.use_buf(ctx0, -1);
|
||||
|
||||
//struct ggml_tensor * k = ggml_view_1d(ctx0, wstate.kv_cross.k, n_state*n_ctx, (ggml_element_size(wstate.kv_cross.k)*n_state)*(il*hparams.n_audio_ctx + iter*n_ctx));
|
||||
//struct ggml_tensor * v = ggml_view_1d(ctx0, wstate.kv_cross.v, n_state*n_ctx, (ggml_element_size(wstate.kv_cross.v)*n_state)*(il*hparams.n_audio_ctx + iter*n_ctx));
|
||||
struct ggml_tensor* k = ggml_view_1d(ctx0, wstate.kv_cross.k, n_state*n_ctx, (ggml_element_size(wstate.kv_cross.k)*n_state)*(il*n_ctx));
|
||||
struct ggml_tensor* v = ggml_view_1d(ctx0, wstate.kv_cross.v, n_state*n_ctx, (ggml_element_size(wstate.kv_cross.v)*n_state)*(il*n_ctx));
|
||||
Vcross = ggml_transpose(ctx0, ggml_reshape_2d(ctx0, Vcross, n_state, n_ctx));
|
||||
|
||||
struct ggml_tensor * k = ggml_view_1d(ctx0, wstate.kv_cross.k, n_state*n_ctx, (ggml_element_size(wstate.kv_cross.k)*n_state)*(il*n_ctx));
|
||||
struct ggml_tensor * v = ggml_view_2d(ctx0, wstate.kv_cross.v, n_ctx, n_state,
|
||||
( n_ctx)*ggml_element_size(wstate.kv_cross.v),
|
||||
(il*n_ctx)*ggml_element_size(wstate.kv_cross.v)*n_state);
|
||||
|
||||
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcross, k));
|
||||
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcross, v));
|
||||
@ -1797,9 +1808,11 @@ static bool whisper_decode_internal(
|
||||
|
||||
//WHISPER_PRINT_DEBUG("%s: n_past = %d, N = %d, M = %d, n_ctx = %d\n", __func__, n_past, N, M, n_ctx);
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = wstate.buf_compute.size();
|
||||
params.mem_buffer = wstate.buf_compute.data();
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ wstate.buf_compute.size(),
|
||||
/*.mem_buffer =*/ wstate.buf_compute.data(),
|
||||
/*.no_alloc =*/ false,
|
||||
};
|
||||
|
||||
struct ggml_context * ctx0 = ggml_init(params);
|
||||
|
||||
@ -1862,20 +1875,24 @@ static bool whisper_decode_internal(
|
||||
|
||||
Kcur = ggml_scale(ctx0, Kcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
|
||||
|
||||
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0,
|
||||
layer.attn_v_w,
|
||||
cur);
|
||||
|
||||
Vcur = ggml_add(ctx0,
|
||||
ggml_repeat(ctx0,
|
||||
layer.attn_v_b,
|
||||
Vcur),
|
||||
Vcur);
|
||||
|
||||
// store key and value to memory
|
||||
{
|
||||
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0,
|
||||
layer.attn_v_w,
|
||||
cur);
|
||||
|
||||
Vcur = ggml_add(ctx0,
|
||||
ggml_repeat(ctx0,
|
||||
layer.attn_v_b,
|
||||
Vcur),
|
||||
Vcur);
|
||||
|
||||
Vcur = ggml_transpose(ctx0, ggml_reshape_2d(ctx0, Vcur, n_state, N));
|
||||
|
||||
struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_state, (ggml_element_size(kv_self.k)*n_state)*(il*n_ctx + n_past));
|
||||
struct ggml_tensor * v = ggml_view_1d(ctx0, kv_self.v, N*n_state, (ggml_element_size(kv_self.v)*n_state)*(il*n_ctx + n_past));
|
||||
struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_state,
|
||||
( n_ctx)*ggml_element_size(kv_self.v),
|
||||
(il*n_ctx)*ggml_element_size(kv_self.v)*n_state + n_past*ggml_element_size(kv_self.v));
|
||||
|
||||
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
|
||||
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
|
||||
@ -1914,16 +1931,14 @@ static bool whisper_decode_internal(
|
||||
|
||||
struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx0, KQ_masked);
|
||||
|
||||
struct ggml_tensor * V_trans =
|
||||
ggml_cpy(ctx0,
|
||||
ggml_permute(ctx0,
|
||||
ggml_reshape_3d(ctx0,
|
||||
ggml_view_1d(ctx0, kv_self.v, (n_past + N)*n_state, il*n_ctx*ggml_element_size(kv_self.v)*n_state),
|
||||
n_state/n_head, n_head, n_past + N),
|
||||
1, 2, 0, 3),
|
||||
ggml_new_tensor_3d(ctx0, kv_self.v->type, n_past + N, n_state/n_head, n_head));
|
||||
struct ggml_tensor * V =
|
||||
ggml_view_3d(ctx0, kv_self.v,
|
||||
n_past + N, n_state/n_head, n_head,
|
||||
n_ctx*ggml_element_size(kv_self.v),
|
||||
n_ctx*ggml_element_size(kv_self.v)*n_state/n_head,
|
||||
il*n_ctx*ggml_element_size(kv_self.v)*n_state);
|
||||
|
||||
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max);
|
||||
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max);
|
||||
|
||||
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
|
||||
|
||||
@ -1986,15 +2001,22 @@ static bool whisper_decode_internal(
|
||||
ggml_view_1d(ctx0, wstate.kv_cross.k, M*n_state, il*M*ggml_element_size(wstate.kv_cross.k)*n_state),
|
||||
n_state/n_head, n_head, M);
|
||||
|
||||
struct ggml_tensor * Vcross =
|
||||
ggml_reshape_3d(ctx0,
|
||||
ggml_view_1d(ctx0, wstate.kv_cross.v, M*n_state, il*M*ggml_element_size(wstate.kv_cross.v)*n_state),
|
||||
n_state/n_head, n_head, M);
|
||||
//struct ggml_tensor * Vcross =
|
||||
// ggml_reshape_3d(ctx0,
|
||||
// ggml_view_1d(ctx0, wstate.kv_cross.v, M*n_state, il*M*ggml_element_size(wstate.kv_cross.v)*n_state),
|
||||
// n_state/n_head, n_head, M);
|
||||
|
||||
struct ggml_tensor * V_trans =
|
||||
ggml_cpy(ctx0,
|
||||
ggml_permute(ctx0, Vcross, 1, 2, 0, 3),
|
||||
ggml_new_tensor_3d(ctx0, Vcross->type, M, n_state/n_head, n_head));
|
||||
//struct ggml_tensor * V_trans =
|
||||
// ggml_cpy(ctx0,
|
||||
// ggml_permute(ctx0, Vcross, 1, 2, 0, 3),
|
||||
// ggml_new_tensor_3d(ctx0, Vcross->type, M, n_state/n_head, n_head));
|
||||
|
||||
struct ggml_tensor * V =
|
||||
ggml_view_3d(ctx0, wstate.kv_cross.v,
|
||||
M, n_state/n_head, n_head,
|
||||
M*ggml_element_size(wstate.kv_cross.v),
|
||||
M*ggml_element_size(wstate.kv_cross.v)*n_state/n_head,
|
||||
il*M*ggml_element_size(wstate.kv_cross.v)*n_state);
|
||||
|
||||
// ------
|
||||
|
||||
@ -2021,7 +2043,7 @@ static bool whisper_decode_internal(
|
||||
|
||||
struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx0, KQ);
|
||||
|
||||
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max);
|
||||
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max);
|
||||
|
||||
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
|
||||
|
||||
@ -4726,6 +4748,7 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
|
||||
struct ggml_init_params gparams = {
|
||||
/*.mem_size =*/ buf.size(),
|
||||
/*.mem_buffer =*/ buf.data(),
|
||||
/*.no_alloc =*/ false,
|
||||
};
|
||||
|
||||
struct ggml_context * ctx0 = ggml_init(gparams);
|
||||
|
Loading…
Reference in New Issue
Block a user