mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-07-03 16:01:03 +02:00
Compare commits
5 Commits
macros-cvt
...
nvblas
Author | SHA1 | Date | |
---|---|---|---|
683f111088 | |||
3996ecc156 | |||
faa85f9840 | |||
b6597539f9 | |||
9a4b7a916e |
30
README.md
30
README.md
@ -52,21 +52,6 @@ The tensor operators are optimized heavily for Apple silicon CPUs. Depending on
|
|||||||
instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
|
instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
|
||||||
the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.
|
the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.
|
||||||
|
|
||||||
## Limitations
|
|
||||||
|
|
||||||
- Inference only
|
|
||||||
- No GPU support
|
|
||||||
- Very basic greedy sampling scheme - always pick up the token with highest probability.
|
|
||||||
This should be similar to the [GreedyDecoder](https://github.com/openai/whisper/blob/main/whisper/decoding.py#L249-L274)
|
|
||||||
from the original python implementation, so in order to make a fair comparison between the 2 implementations, make sure
|
|
||||||
to run the python code with the following parameters:
|
|
||||||
|
|
||||||
```
|
|
||||||
whisper --best_of None --beam_size None ...
|
|
||||||
```
|
|
||||||
|
|
||||||
In the future, `whisper.cpp` will support more sampling strategies.
|
|
||||||
|
|
||||||
## Quick start
|
## Quick start
|
||||||
|
|
||||||
First, download one of the Whisper models converted in [ggml format](models). For example:
|
First, download one of the Whisper models converted in [ggml format](models). For example:
|
||||||
@ -220,6 +205,21 @@ make large
|
|||||||
| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
|
| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
|
||||||
| large | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
|
| large | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
|
||||||
|
- Inference only
|
||||||
|
- No GPU support
|
||||||
|
- Very basic greedy sampling scheme - always pick up the token with highest probability.
|
||||||
|
This should be similar to the [GreedyDecoder](https://github.com/openai/whisper/blob/main/whisper/decoding.py#L249-L274)
|
||||||
|
from the original python implementation, so in order to make a fair comparison between the 2 implementations, make sure
|
||||||
|
to run the python code with the following parameters:
|
||||||
|
|
||||||
|
```
|
||||||
|
whisper --best_of None --beam_size None ...
|
||||||
|
```
|
||||||
|
|
||||||
|
In the future, `whisper.cpp` will support more sampling strategies.
|
||||||
|
|
||||||
## Another example
|
## Another example
|
||||||
|
|
||||||
Here is another example of transcribing a [3:24 min speech](https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg)
|
Here is another example of transcribing a [3:24 min speech](https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg)
|
||||||
|
@ -4,10 +4,6 @@ set -eo pipefail
|
|||||||
# Idea by @semiformal-net
|
# Idea by @semiformal-net
|
||||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/185
|
# ref: https://github.com/ggerganov/whisper.cpp/issues/185
|
||||||
#
|
#
|
||||||
# TODO:
|
|
||||||
# - Currently, there is a gap between sequential chunks, so some of the words are dropped. Need to figure out a
|
|
||||||
# way to produce a continuous stream of audio chunks.
|
|
||||||
#
|
|
||||||
|
|
||||||
url="http://a.files.bbci.co.uk/media/live/manifesto/audio/simulcast/hls/nonuk/sbr_low/ak/bbc_world_service.m3u8"
|
url="http://a.files.bbci.co.uk/media/live/manifesto/audio/simulcast/hls/nonuk/sbr_low/ak/bbc_world_service.m3u8"
|
||||||
fmt=aac # the audio format extension of the stream (TODO: auto detect)
|
fmt=aac # the audio format extension of the stream (TODO: auto detect)
|
||||||
|
26
ggml.c
26
ggml.c
@ -96,6 +96,8 @@ typedef void* thread_ret_t;
|
|||||||
#include <Accelerate/Accelerate.h>
|
#include <Accelerate/Accelerate.h>
|
||||||
#elif GGML_USE_OPENBLAS
|
#elif GGML_USE_OPENBLAS
|
||||||
#include <cblas.h>
|
#include <cblas.h>
|
||||||
|
// sgemm
|
||||||
|
extern void sgemm_(char* transa, char* transb, int* m, int* n, int* k, float* alpha, float* a, int* lda, float* b, int* ldb, float* beta, float* c, int* ldc);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// floating point type used to accumulate sums
|
// floating point type used to accumulate sums
|
||||||
@ -213,7 +215,7 @@ ggml_fp16_t ggml_fp32_to_fp16(float f) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define GGML_FP16_TO_FP32(x) ggml_fp16_to_fp32(x)
|
#define GGML_FP16_TO_FP32(x) ggml_fp16_to_fp32(x)
|
||||||
#define GGML_FP32_TO_TP16(x) ggml_fp32_to_fp16(x)
|
#define GGML_FP32_TO_FP16(x) ggml_fp32_to_fp16(x)
|
||||||
|
|
||||||
#endif // __F16C__
|
#endif // __F16C__
|
||||||
|
|
||||||
@ -4588,11 +4590,23 @@ void ggml_compute_forward_mul_mat_f16_f32(
|
|||||||
|
|
||||||
// zT = y * xT
|
// zT = y * xT
|
||||||
{
|
{
|
||||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
//cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||||
ne11, ne01, ne10,
|
// ne11, ne01, ne10,
|
||||||
1.0f, y, ne10,
|
// 1.0f, y, ne10,
|
||||||
x, ne10,
|
// x, ne10,
|
||||||
0.0f, d, ne01);
|
// 0.0f, d, ne01);
|
||||||
|
|
||||||
|
// this is compatible with nvblas
|
||||||
|
float one = 1.0f;
|
||||||
|
float zero = 0.0f;
|
||||||
|
sgemm_(
|
||||||
|
"T", "N",
|
||||||
|
&ne0, &ne1, &ne10,
|
||||||
|
&one,
|
||||||
|
x, &ne10,
|
||||||
|
y, &ne10,
|
||||||
|
&zero,
|
||||||
|
d, &ne0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user