mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-07-02 23:41:28 +02:00
Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
0b9af32a8b | |||
11b1b63b14 | |||
0e26a6c92e | |||
66d8f0b7f1 | |||
ba5bcde874 | |||
ab0a8593c5 | |||
668ffc9b23 |
@ -1,6 +1,6 @@
|
||||
cmake_minimum_required (VERSION 3.5)
|
||||
|
||||
project(whisper.cpp VERSION 1.5.3)
|
||||
project(whisper.cpp VERSION 1.5.4)
|
||||
|
||||
# Add path to modules
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||
|
@ -6,7 +6,7 @@
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://www.npmjs.com/package/whisper.cpp/)
|
||||
|
||||
Stable: [v1.5.3](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.3) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||
Stable: [v1.5.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||
|
||||
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
||||
|
||||
|
Submodule bindings/ios updated: 918b107c70...b21b6ff325
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "whisper.cpp",
|
||||
"version": "1.5.3",
|
||||
"version": "1.5.4",
|
||||
"description": "Whisper speech recognition",
|
||||
"main": "whisper.js",
|
||||
"scripts": {
|
||||
|
@ -24,9 +24,9 @@ struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
|
||||
|
||||
// select which device to run the Core ML model on
|
||||
MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
|
||||
config.computeUnits = MLComputeUnitsCPUAndGPU;
|
||||
// config.computeUnits = MLComputeUnitsCPUAndGPU;
|
||||
//config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
|
||||
//config.computeUnits = MLComputeUnitsAll;
|
||||
config.computeUnits = MLComputeUnitsAll;
|
||||
|
||||
const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
|
||||
|
||||
|
2
examples/whisper.swiftui/.gitignore
vendored
Normal file
2
examples/whisper.swiftui/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
xcuserdata
|
||||
xcshareddata
|
@ -1 +1 @@
|
||||
3fd01e00e40583ccd4b393a7c6502d6a4455a1d5
|
||||
3eace58911ea8d2cf35defdc59848d99b91a57f5
|
||||
|
@ -9689,8 +9689,8 @@ static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t buffer, gg
|
||||
|
||||
ggml_cuda_set_device(ctx->device);
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
|
||||
CUDA_CHECK(cudaMemcpy((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||
|
@ -258,14 +258,14 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
|
||||
#endif
|
||||
NSError * error = nil;
|
||||
NSString * libPath = [bundle pathForResource:@"ggml" ofType:@"metallib"];
|
||||
NSString * libPath = [bundle pathForResource:@"default" ofType:@"metallib"];
|
||||
if (libPath != nil) {
|
||||
// pre-compiled library found
|
||||
NSURL * libURL = [NSURL fileURLWithPath:libPath];
|
||||
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
|
||||
ctx->library = [ctx->device newLibraryWithURL:libURL error:&error];
|
||||
} else {
|
||||
GGML_METAL_LOG_INFO("%s: ggml.metallib not found, loading from source\n", __func__);
|
||||
GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
|
||||
|
||||
NSString * sourcePath;
|
||||
NSString * ggmlMetalPathResources = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
|
||||
@ -295,7 +295,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
#endif
|
||||
// try to disable fast-math
|
||||
// NOTE: this seems to have no effect whatsoever
|
||||
// instead, in order to disable fast-math, we have to build ggml.metallib from the command line
|
||||
// instead, in order to disable fast-math, we have to build default.metallib from the command line
|
||||
// using xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
|
||||
// and go through the "pre-compiled library found" path above
|
||||
//[options setFastMathEnabled:false];
|
||||
|
@ -70,7 +70,7 @@ static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block s
|
||||
// 2-bit quantization
|
||||
// weight is represented as x = a * q + b
|
||||
// 16 blocks of 16 elements each
|
||||
// Effectively 2.5625 bits per weight
|
||||
// Effectively 2.625 bits per weight
|
||||
typedef struct {
|
||||
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
||||
uint8_t qs[QK_K/4]; // quants
|
||||
|
@ -143,20 +143,7 @@ class AudioEncoderANE(AudioEncoder):
|
||||
x = block(x)
|
||||
|
||||
x = self.ln_post(x)
|
||||
|
||||
# """
|
||||
# TODO:
|
||||
# I think we need to transpose the result here to make it fit whisper.cpp memory order.
|
||||
# However, even doing this, the results are still wrong. Kind of less wrong compared to
|
||||
# not transposing, but still wrong.
|
||||
|
||||
# Also, I don't know why the original OpenAI implementation does not need to transpose
|
||||
|
||||
# transpose to (batch_size, n_ctx, n_state)
|
||||
# x : torch.Tensor, shape = (batch_size, n_state, 1, n_ctx)
|
||||
|
||||
# """
|
||||
# x = x.transpose(1,3)
|
||||
x = x.squeeze(2).transpose(1, 2)
|
||||
|
||||
return x
|
||||
|
||||
|
@ -23,7 +23,7 @@ if [[ $mname == "-h5" ]]; then
|
||||
echo $mpath
|
||||
python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True
|
||||
else
|
||||
python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True
|
||||
python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True --optimize-ane True
|
||||
fi
|
||||
|
||||
xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/
|
||||
|
@ -3821,6 +3821,7 @@ void whisper_reset_timings(struct whisper_context * ctx) {
|
||||
ctx->state->t_sample_us = 0;
|
||||
ctx->state->t_encode_us = 0;
|
||||
ctx->state->t_decode_us = 0;
|
||||
ctx->state->t_batchd_us = 0;
|
||||
ctx->state->t_prompt_us = 0;
|
||||
ctx->state->n_sample = 0;
|
||||
ctx->state->n_encode = 0;
|
||||
|
Reference in New Issue
Block a user