mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-16 12:18:35 +02:00
rebase on master after whisper_state changes
This commit is contained in:
73
whisper.cpp
73
whisper.cpp
@ -589,7 +589,6 @@ struct whisper_state {
|
|||||||
|
|
||||||
int lang_id = 0; // english by default
|
int lang_id = 0; // english by default
|
||||||
|
|
||||||
std::string path_model; // populated by whisper_init_from_file()
|
|
||||||
#ifdef WHISPER_USE_COREML
|
#ifdef WHISPER_USE_COREML
|
||||||
whisper_coreml_context * ctx_coreml;
|
whisper_coreml_context * ctx_coreml;
|
||||||
#endif
|
#endif
|
||||||
@ -644,6 +643,8 @@ struct whisper_context {
|
|||||||
whisper_model model;
|
whisper_model model;
|
||||||
whisper_vocab vocab;
|
whisper_vocab vocab;
|
||||||
whisper_state * state = nullptr;
|
whisper_state * state = nullptr;
|
||||||
|
|
||||||
|
std::string path_model; // populated by whisper_init_from_file()
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@ -1606,7 +1607,7 @@ static bool whisper_encode_internal(
|
|||||||
ggml_repeat(ctx0, layer.mlp_ln_w, cur),
|
ggml_repeat(ctx0, layer.mlp_ln_w, cur),
|
||||||
cur),
|
cur),
|
||||||
ggml_repeat(ctx0, layer.mlp_ln_b, cur));
|
ggml_repeat(ctx0, layer.mlp_ln_b, cur));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef WHISPER_USE_FLASH_FF
|
#ifdef WHISPER_USE_FLASH_FF
|
||||||
wstate.use_buf(ctx0, 0);
|
wstate.use_buf(ctx0, 0);
|
||||||
@ -1646,7 +1647,7 @@ static bool whisper_encode_internal(
|
|||||||
ggml_repeat(ctx0, layer.mlp_1_b, cur),
|
ggml_repeat(ctx0, layer.mlp_1_b, cur),
|
||||||
cur);
|
cur);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
wstate.use_buf(ctx0, 3);
|
wstate.use_buf(ctx0, 3);
|
||||||
|
|
||||||
@ -1684,11 +1685,11 @@ static bool whisper_encode_internal(
|
|||||||
//ggml_graph_print(&gf);
|
//ggml_graph_print(&gf);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
wctx.use_buf(ctx0, -1);
|
wstate.use_buf(ctx0, -1);
|
||||||
|
|
||||||
struct ggml_tensor * cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
|
struct ggml_tensor * cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
|
||||||
|
|
||||||
whisper_coreml_encode(wctx.ctx_coreml, (float *) mel->data, (float *) cur->data);
|
whisper_coreml_encode(wstate.ctx_coreml, (float *) mel->data, (float *) cur->data);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// cur
|
// cur
|
||||||
@ -2493,12 +2494,25 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
|
|||||||
// interface implementation
|
// interface implementation
|
||||||
//
|
//
|
||||||
|
|
||||||
|
#ifdef WHISPER_USE_COREML
|
||||||
|
// replace .bin with .mlmodelc
|
||||||
|
static std::string whisper_get_coreml_path(std::string path_bin) {
|
||||||
|
auto pos = path_bin.rfind('.');
|
||||||
|
if (pos != std::string::npos) {
|
||||||
|
path_bin = path_bin.substr(0, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
path_bin += ".mlmodelc";
|
||||||
|
|
||||||
|
return path_bin;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
||||||
whisper_state * state = new whisper_state;
|
whisper_state * state = new whisper_state;
|
||||||
|
|
||||||
const size_t scale = ctx->model.hparams.f16 ? 1 : 2;
|
const size_t scale = ctx->model.hparams.f16 ? 1 : 2;
|
||||||
|
|
||||||
|
|
||||||
if (!kv_cache_init(ctx->model.hparams, scale * MEM_REQ_KV_SELF.at(ctx->model.type), state->decoders[0].kv_self, ctx->wtype, ctx->model.hparams.n_text_ctx)) {
|
if (!kv_cache_init(ctx->model.hparams, scale * MEM_REQ_KV_SELF.at(ctx->model.type), state->decoders[0].kv_self, ctx->wtype, ctx->model.hparams.n_text_ctx)) {
|
||||||
fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
|
fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -2519,6 +2533,20 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|||||||
fprintf(stderr, "%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0);
|
fprintf(stderr, "%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef WHISPER_USE_COREML
|
||||||
|
const auto path_coreml = whisper_get_coreml_path(ctx->path_model);
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
|
||||||
|
fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
|
||||||
|
|
||||||
|
state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
|
||||||
|
if (!state->ctx_coreml) {
|
||||||
|
fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: Core ML model loaded\n", __func__);
|
||||||
|
#endif
|
||||||
|
|
||||||
state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx);
|
state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx);
|
||||||
|
|
||||||
@ -2542,20 +2570,6 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef WHISPER_USE_COREML
|
|
||||||
// replace .bin with .mlmodelc
|
|
||||||
static std::string whisper_get_coreml_path(std::string path_bin) {
|
|
||||||
auto pos = path_bin.rfind('.');
|
|
||||||
if (pos != std::string::npos) {
|
|
||||||
path_bin = path_bin.substr(0, pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
path_bin += ".mlmodelc";
|
|
||||||
|
|
||||||
return path_bin;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct whisper_context * whisper_init_from_file_no_state(const char * path_model) {
|
struct whisper_context * whisper_init_from_file_no_state(const char * path_model) {
|
||||||
whisper_model_loader loader = {};
|
whisper_model_loader loader = {};
|
||||||
|
|
||||||
@ -2585,23 +2599,10 @@ struct whisper_context * whisper_init_from_file_no_state(const char * path_model
|
|||||||
fin->close();
|
fin->close();
|
||||||
};
|
};
|
||||||
|
|
||||||
auto ctx = whisper_init(&loader);
|
auto ctx = whisper_init_no_state(&loader);
|
||||||
|
|
||||||
if (ctx) {
|
if (ctx) {
|
||||||
ctx->path_model = path_model;
|
ctx->path_model = path_model;
|
||||||
#ifdef WHISPER_USE_COREML
|
|
||||||
const auto path_coreml = whisper_get_coreml_path(ctx->path_model);
|
|
||||||
fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
|
|
||||||
fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
|
|
||||||
|
|
||||||
ctx->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
|
|
||||||
if (!ctx->ctx_coreml) {
|
|
||||||
fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "%s: Core ML model loaded\n", __func__);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ctx;
|
return ctx;
|
||||||
@ -2730,8 +2731,8 @@ void whisper_free(struct whisper_context * ctx) {
|
|||||||
whisper_free_state(ctx->state);
|
whisper_free_state(ctx->state);
|
||||||
|
|
||||||
#ifdef WHISPER_USE_COREML
|
#ifdef WHISPER_USE_COREML
|
||||||
whisper_coreml_free(ctx->ctx_coreml);
|
whisper_coreml_free(ctx->state->ctx_coreml);
|
||||||
ctx->ctx_coreml = nullptr;
|
ctx->state->ctx_coreml = nullptr;
|
||||||
#endif
|
#endif
|
||||||
delete ctx;
|
delete ctx;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user