mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-02-20 20:20:57 +01:00
whisper : add description of ggml_mul_mat_pad
This commit is contained in:
parent
f36554382a
commit
2b4160af29
@ -24,8 +24,8 @@ struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
|
|||||||
|
|
||||||
// select which device to run the Core ML model on
|
// select which device to run the Core ML model on
|
||||||
MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
|
MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
|
||||||
//config.computeUnits = MLComputeUnitsCPUAndGPU;
|
config.computeUnits = MLComputeUnitsCPUAndGPU;
|
||||||
config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
|
//config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
|
||||||
//config.computeUnits = MLComputeUnitsAll;
|
//config.computeUnits = MLComputeUnitsAll;
|
||||||
|
|
||||||
const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
|
const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
|
||||||
|
13
whisper.cpp
13
whisper.cpp
@ -136,6 +136,19 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
|
|||||||
ggml_graph_compute(graph, &plan);
|
ggml_graph_compute(graph, &plan);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// faster matrix multiplications for tensors that do not have dimension 0 divisible by "pad"
|
||||||
|
// the idea is to represent the original matrix multiplication:
|
||||||
|
//
|
||||||
|
// Z = X @ Y
|
||||||
|
//
|
||||||
|
// with two matrix multiplications:
|
||||||
|
//
|
||||||
|
// Z = [X_0; X_1] @ [Y_0; Y_1]
|
||||||
|
//
|
||||||
|
// here X_0 and Y_0 are views of X and Y that have dimension 0 divisible by "pad"
|
||||||
|
// and X_1 and Y_1 are the remaining views. X_1 and Y_1 end up being small matrices that can be processed with more
|
||||||
|
// general-purpose kernels
|
||||||
|
//
|
||||||
static struct ggml_tensor * ggml_mul_mat_pad(struct ggml_context * ctx, struct ggml_tensor * x, struct ggml_tensor * y, int pad = 32) {
|
static struct ggml_tensor * ggml_mul_mat_pad(struct ggml_context * ctx, struct ggml_tensor * x, struct ggml_tensor * y, int pad = 32) {
|
||||||
//#if !defined(GGML_USE_METAL)
|
//#if !defined(GGML_USE_METAL)
|
||||||
// return ggml_mul_mat(ctx, x, y);
|
// return ggml_mul_mat(ctx, x, y);
|
||||||
|
Loading…
Reference in New Issue
Block a user