forked from extern/whisper.cpp
whisper : reduce memory usage during inference (#431)
* ggml : add "scratch" buffer support * ggml : support for scratch ring-buffer * ggml : bug fix in ggml_repeat() * ggml : error on scratch buffer overflow * whisper : use scratch buffers during inference (base model only) * whisper : update memory usage for all models * whisper : fix encoder memory usage * whisper : use whisper_context functions instead of macros * whisper : fix FF + remove it from README * ggml : reuse ggml_new_i32 * ggml : refactor the scratch buffer storage * whisper : reorder scratch buffers in the decoder * main : add option to disable temp fallback * Update README.md
This commit is contained in:
parent
c306a7fd89
commit
f3ee4a9673
133
README.md
133
README.md
@ -13,7 +13,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
|
|||||||
- AVX intrinsics support for x86 architectures
|
- AVX intrinsics support for x86 architectures
|
||||||
- VSX intrinsics support for POWER architectures
|
- VSX intrinsics support for POWER architectures
|
||||||
- Mixed F16 / F32 precision
|
- Mixed F16 / F32 precision
|
||||||
- Low memory usage (Flash Attention + Flash Forward)
|
- Low memory usage (Flash Attention)
|
||||||
- Zero memory allocations at runtime
|
- Zero memory allocations at runtime
|
||||||
- Runs on the CPU
|
- Runs on the CPU
|
||||||
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
|
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
|
||||||
@ -105,11 +105,13 @@ options:
|
|||||||
-su, --speed-up [false ] speed up audio by x2 (reduced accuracy)
|
-su, --speed-up [false ] speed up audio by x2 (reduced accuracy)
|
||||||
-tr, --translate [false ] translate from source language to english
|
-tr, --translate [false ] translate from source language to english
|
||||||
-di, --diarize [false ] stereo audio diarization
|
-di, --diarize [false ] stereo audio diarization
|
||||||
|
-nf, --no-fallback [false ] do not use temperature fallback while decoding
|
||||||
-otxt, --output-txt [false ] output result in a text file
|
-otxt, --output-txt [false ] output result in a text file
|
||||||
-ovtt, --output-vtt [false ] output result in a vtt file
|
-ovtt, --output-vtt [false ] output result in a vtt file
|
||||||
-osrt, --output-srt [false ] output result in a srt file
|
-osrt, --output-srt [false ] output result in a srt file
|
||||||
-owts, --output-words [false ] output script for generating karaoke video
|
-owts, --output-words [false ] output script for generating karaoke video
|
||||||
-ocsv, --output-csv [false ] output result in a CSV file
|
-ocsv, --output-csv [false ] output result in a CSV file
|
||||||
|
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
|
||||||
-ps, --print-special [false ] print special tokens
|
-ps, --print-special [false ] print special tokens
|
||||||
-pc, --print-colors [false ] print colors
|
-pc, --print-colors [false ] print colors
|
||||||
-pp, --print-progress [false ] print progress
|
-pp, --print-progress [false ] print progress
|
||||||
@ -137,7 +139,8 @@ Running base.en on all samples in ./samples ...
|
|||||||
[+] Running base.en on samples/jfk.wav ... (run 'ffplay samples/jfk.wav' to listen)
|
[+] Running base.en on samples/jfk.wav ... (run 'ffplay samples/jfk.wav' to listen)
|
||||||
----------------------------------------------
|
----------------------------------------------
|
||||||
|
|
||||||
whisper_model_load: loading model from 'models/ggml-base.en.bin'
|
whisper_init_from_file: loading model from 'models/ggml-base.en.bin'
|
||||||
|
whisper_model_load: loading model
|
||||||
whisper_model_load: n_vocab = 51864
|
whisper_model_load: n_vocab = 51864
|
||||||
whisper_model_load: n_audio_ctx = 1500
|
whisper_model_load: n_audio_ctx = 1500
|
||||||
whisper_model_load: n_audio_state = 512
|
whisper_model_load: n_audio_state = 512
|
||||||
@ -150,13 +153,14 @@ whisper_model_load: n_text_layer = 6
|
|||||||
whisper_model_load: n_mels = 80
|
whisper_model_load: n_mels = 80
|
||||||
whisper_model_load: f16 = 1
|
whisper_model_load: f16 = 1
|
||||||
whisper_model_load: type = 2
|
whisper_model_load: type = 2
|
||||||
|
whisper_model_load: mem required = 215.00 MB (+ 6.00 MB per decoder)
|
||||||
|
whisper_model_load: kv self size = 5.25 MB
|
||||||
|
whisper_model_load: kv cross size = 17.58 MB
|
||||||
whisper_model_load: adding 1607 extra tokens
|
whisper_model_load: adding 1607 extra tokens
|
||||||
whisper_model_load: mem_required = 506.00 MB
|
whisper_model_load: model ctx = 140.60 MB
|
||||||
whisper_model_load: ggml ctx size = 140.60 MB
|
|
||||||
whisper_model_load: memory size = 22.83 MB
|
|
||||||
whisper_model_load: model size = 140.54 MB
|
whisper_model_load: model size = 140.54 MB
|
||||||
|
|
||||||
system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
|
system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
|
||||||
|
|
||||||
main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
||||||
|
|
||||||
@ -164,12 +168,13 @@ main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 proc
|
|||||||
[00:00:00.000 --> 00:00:11.000] And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
|
[00:00:00.000 --> 00:00:11.000] And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
|
||||||
|
|
||||||
|
|
||||||
whisper_print_timings: load time = 105.91 ms
|
whisper_print_timings: fallbacks = 0 p / 0 h
|
||||||
whisper_print_timings: mel time = 24.62 ms
|
whisper_print_timings: load time = 113.81 ms
|
||||||
whisper_print_timings: sample time = 3.63 ms
|
whisper_print_timings: mel time = 15.40 ms
|
||||||
whisper_print_timings: encode time = 324.71 ms / 54.12 ms per layer
|
whisper_print_timings: sample time = 11.58 ms / 27 runs ( 0.43 ms per run)
|
||||||
whisper_print_timings: decode time = 83.58 ms / 13.93 ms per layer
|
whisper_print_timings: encode time = 266.60 ms / 1 runs ( 266.60 ms per run)
|
||||||
whisper_print_timings: total time = 542.81 ms
|
whisper_print_timings: decode time = 66.11 ms / 27 runs ( 2.45 ms per run)
|
||||||
|
whisper_print_timings: total time = 476.31 ms
|
||||||
```
|
```
|
||||||
|
|
||||||
The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
|
The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
|
||||||
@ -212,11 +217,11 @@ make large
|
|||||||
|
|
||||||
| Model | Disk | Mem | SHA |
|
| Model | Disk | Mem | SHA |
|
||||||
| --- | --- | --- | --- |
|
| --- | --- | --- | --- |
|
||||||
| tiny | 75 MB | ~390 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
|
| tiny | 75 MB | ~125 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
|
||||||
| base | 142 MB | ~500 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
|
| base | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
|
||||||
| small | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
|
| small | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
|
||||||
| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
|
| medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
|
||||||
| large | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
|
| large | 2.9 GB | ~3.3 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
|
||||||
|
|
||||||
## Limitations
|
## Limitations
|
||||||
|
|
||||||
@ -234,7 +239,8 @@ in about half a minute on a MacBook M1 Pro, using `medium.en` model:
|
|||||||
```java
|
```java
|
||||||
$ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8
|
$ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8
|
||||||
|
|
||||||
whisper_model_load: loading model from 'models/ggml-medium.en.bin'
|
whisper_init_from_file: loading model from 'models/ggml-medium.en.bin'
|
||||||
|
whisper_model_load: loading model
|
||||||
whisper_model_load: n_vocab = 51864
|
whisper_model_load: n_vocab = 51864
|
||||||
whisper_model_load: n_audio_ctx = 1500
|
whisper_model_load: n_audio_ctx = 1500
|
||||||
whisper_model_load: n_audio_state = 1024
|
whisper_model_load: n_audio_state = 1024
|
||||||
@ -247,55 +253,60 @@ whisper_model_load: n_text_layer = 24
|
|||||||
whisper_model_load: n_mels = 80
|
whisper_model_load: n_mels = 80
|
||||||
whisper_model_load: f16 = 1
|
whisper_model_load: f16 = 1
|
||||||
whisper_model_load: type = 4
|
whisper_model_load: type = 4
|
||||||
whisper_model_load: mem_required = 2610.00 MB
|
whisper_model_load: mem required = 1720.00 MB (+ 43.00 MB per decoder)
|
||||||
|
whisper_model_load: kv self size = 42.00 MB
|
||||||
|
whisper_model_load: kv cross size = 140.62 MB
|
||||||
whisper_model_load: adding 1607 extra tokens
|
whisper_model_load: adding 1607 extra tokens
|
||||||
whisper_model_load: ggml ctx size = 1644.97 MB
|
whisper_model_load: model ctx = 1462.35 MB
|
||||||
whisper_model_load: memory size = 182.62 MB
|
|
||||||
whisper_model_load: model size = 1462.12 MB
|
whisper_model_load: model size = 1462.12 MB
|
||||||
|
|
||||||
main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, lang = en, task = transcribe, timestamps = 1 ...
|
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
|
||||||
|
|
||||||
[00:00.000 --> 00:08.000] My fellow Americans, this day has brought terrible news and great sadness to our country.
|
main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
||||||
[00:08.000 --> 00:17.000] At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
|
|
||||||
[00:17.000 --> 00:23.000] A short time later, debris was seen falling from the skies above Texas.
|
|
||||||
[00:23.000 --> 00:29.000] The Columbia's lost. There are no survivors.
|
|
||||||
[00:29.000 --> 00:32.000] On board was a crew of seven.
|
|
||||||
[00:32.000 --> 00:39.000] Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
|
|
||||||
[00:39.000 --> 00:48.000] Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
|
|
||||||
[00:48.000 --> 00:52.000] a colonel in the Israeli Air Force.
|
|
||||||
[00:52.000 --> 00:58.000] These men and women assumed great risk in the service to all humanity.
|
|
||||||
[00:58.000 --> 01:03.000] In an age when space flight has come to seem almost routine,
|
|
||||||
[01:03.000 --> 01:07.000] it is easy to overlook the dangers of travel by rocket
|
|
||||||
[01:07.000 --> 01:12.000] and the difficulties of navigating the fierce outer atmosphere of the Earth.
|
|
||||||
[01:12.000 --> 01:18.000] These astronauts knew the dangers, and they faced them willingly,
|
|
||||||
[01:18.000 --> 01:23.000] knowing they had a high and noble purpose in life.
|
|
||||||
[01:23.000 --> 01:31.000] Because of their courage and daring and idealism, we will miss them all the more.
|
|
||||||
[01:31.000 --> 01:36.000] All Americans today are thinking as well of the families of these men and women
|
|
||||||
[01:36.000 --> 01:40.000] who have been given this sudden shock and grief.
|
|
||||||
[01:40.000 --> 01:45.000] You're not alone. Our entire nation grieves with you,
|
|
||||||
[01:45.000 --> 01:52.000] and those you love will always have the respect and gratitude of this country.
|
|
||||||
[01:52.000 --> 01:56.000] The cause in which they died will continue.
|
|
||||||
[01:56.000 --> 02:04.000] Mankind is led into the darkness beyond our world by the inspiration of discovery
|
|
||||||
[02:04.000 --> 02:11.000] and the longing to understand. Our journey into space will go on.
|
|
||||||
[02:11.000 --> 02:16.000] In the skies today, we saw destruction and tragedy.
|
|
||||||
[02:16.000 --> 02:22.000] Yet farther than we can see, there is comfort and hope.
|
|
||||||
[02:22.000 --> 02:29.000] In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
|
|
||||||
[02:29.000 --> 02:35.000] who created all these. He who brings out the starry hosts one by one
|
|
||||||
[02:35.000 --> 02:39.000] and calls them each by name."
|
|
||||||
[02:39.000 --> 02:46.000] Because of His great power and mighty strength, not one of them is missing.
|
|
||||||
[02:46.000 --> 02:55.000] The same Creator who names the stars also knows the names of the seven souls we mourn today.
|
|
||||||
[02:55.000 --> 03:01.000] The crew of the shuttle Columbia did not return safely to earth,
|
|
||||||
[03:01.000 --> 03:05.000] yet we can pray that all are safely home.
|
|
||||||
[03:05.000 --> 03:13.000] May God bless the grieving families, and may God continue to bless America.
|
|
||||||
[03:13.000 --> 03:41.000] Audio
|
|
||||||
|
|
||||||
|
|
||||||
whisper_print_timings: load time = 575.92 ms
|
[00:00:00.000 --> 00:00:08.000] My fellow Americans, this day has brought terrible news and great sadness to our country.
|
||||||
whisper_print_timings: mel time = 230.60 ms
|
[00:00:08.000 --> 00:00:17.000] At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
|
||||||
whisper_print_timings: sample time = 73.19 ms
|
[00:00:17.000 --> 00:00:23.000] A short time later, debris was seen falling from the skies above Texas.
|
||||||
whisper_print_timings: encode time = 19552.61 ms / 814.69 ms per layer
|
[00:00:23.000 --> 00:00:29.000] The Columbia's lost. There are no survivors.
|
||||||
whisper_print_timings: decode time = 13249.96 ms / 552.08 ms per layer
|
[00:00:29.000 --> 00:00:32.000] On board was a crew of seven.
|
||||||
whisper_print_timings: total time = 33686.27 ms
|
[00:00:32.000 --> 00:00:39.000] Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
|
||||||
|
[00:00:39.000 --> 00:00:48.000] Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
|
||||||
|
[00:00:48.000 --> 00:00:52.000] a colonel in the Israeli Air Force.
|
||||||
|
[00:00:52.000 --> 00:00:58.000] These men and women assumed great risk in the service to all humanity.
|
||||||
|
[00:00:58.000 --> 00:01:03.000] In an age when space flight has come to seem almost routine,
|
||||||
|
[00:01:03.000 --> 00:01:07.000] it is easy to overlook the dangers of travel by rocket
|
||||||
|
[00:01:07.000 --> 00:01:12.000] and the difficulties of navigating the fierce outer atmosphere of the Earth.
|
||||||
|
[00:01:12.000 --> 00:01:18.000] These astronauts knew the dangers, and they faced them willingly,
|
||||||
|
[00:01:18.000 --> 00:01:23.000] knowing they had a high and noble purpose in life.
|
||||||
|
[00:01:23.000 --> 00:01:31.000] Because of their courage and daring and idealism, we will miss them all the more.
|
||||||
|
[00:01:31.000 --> 00:01:36.000] All Americans today are thinking as well of the families of these men and women
|
||||||
|
[00:01:36.000 --> 00:01:40.000] who have been given this sudden shock and grief.
|
||||||
|
[00:01:40.000 --> 00:01:45.000] You're not alone. Our entire nation grieves with you,
|
||||||
|
[00:01:45.000 --> 00:01:52.000] and those you love will always have the respect and gratitude of this country.
|
||||||
|
[00:01:52.000 --> 00:01:56.000] The cause in which they died will continue.
|
||||||
|
[00:01:56.000 --> 00:02:04.000] Mankind is led into the darkness beyond our world by the inspiration of discovery
|
||||||
|
[00:02:04.000 --> 00:02:11.000] and the longing to understand. Our journey into space will go on.
|
||||||
|
[00:02:11.000 --> 00:02:16.000] In the skies today, we saw destruction and tragedy.
|
||||||
|
[00:02:16.000 --> 00:02:22.000] Yet farther than we can see, there is comfort and hope.
|
||||||
|
[00:02:22.000 --> 00:02:29.000] In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
|
||||||
|
[00:02:29.000 --> 00:02:35.000] who created all these. He who brings out the starry hosts one by one
|
||||||
|
[00:02:35.000 --> 00:02:39.000] and calls them each by name."
|
||||||
|
[00:02:39.000 --> 00:02:46.000] Because of His great power and mighty strength, not one of them is missing.
|
||||||
|
[00:02:46.000 --> 00:02:55.000] The same Creator who names the stars also knows the names of the seven souls we mourn today.
|
||||||
|
[00:02:55.000 --> 00:03:01.000] The crew of the shuttle Columbia did not return safely to earth,
|
||||||
|
[00:03:01.000 --> 00:03:05.000] yet we can pray that all are safely home.
|
||||||
|
[00:03:05.000 --> 00:03:13.000] May God bless the grieving families, and may God continue to bless America.
|
||||||
|
[00:03:13.000 --> 00:03:19.000] [Silence]
|
||||||
|
|
||||||
|
|
||||||
|
whisper_print_timings: fallbacks = 1 p / 0 h
|
||||||
|
whisper_print_timings: load time = 569.03 ms
|
||||||
|
whisper_print_timings: mel time = 146.85 ms
|
||||||
|
whisper_print_timings: sample time = 238.66 ms / 553 runs ( 0.43 ms per run)
|
||||||
|
whisper_print_timings: encode time = 18665.10 ms / 9 runs ( 2073.90 ms per run)
|
||||||
|
whisper_print_timings: decode time = 13090.93 ms / 549 runs ( 23.85 ms per run)
|
||||||
|
whisper_print_timings: total time = 32733.52 ms
|
||||||
```
|
```
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
File diff suppressed because one or more lines are too long
@ -17,17 +17,27 @@ options:
|
|||||||
-d N, --duration N [0 ] duration of audio to process in milliseconds
|
-d N, --duration N [0 ] duration of audio to process in milliseconds
|
||||||
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
|
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
|
||||||
-ml N, --max-len N [0 ] maximum segment length in characters
|
-ml N, --max-len N [0 ] maximum segment length in characters
|
||||||
|
-bo N, --best-of N [5 ] number of best candidates to keep
|
||||||
|
-bs N, --beam-size N [-1 ] beam size for beam search
|
||||||
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
|
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
|
||||||
|
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
|
||||||
|
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
|
||||||
-su, --speed-up [false ] speed up audio by x2 (reduced accuracy)
|
-su, --speed-up [false ] speed up audio by x2 (reduced accuracy)
|
||||||
-tr, --translate [false ] translate from source language to english
|
-tr, --translate [false ] translate from source language to english
|
||||||
|
-di, --diarize [false ] stereo audio diarization
|
||||||
|
-nf, --no-fallback [false ] do not use temperature fallback while decoding
|
||||||
-otxt, --output-txt [false ] output result in a text file
|
-otxt, --output-txt [false ] output result in a text file
|
||||||
-ovtt, --output-vtt [false ] output result in a vtt file
|
-ovtt, --output-vtt [false ] output result in a vtt file
|
||||||
-osrt, --output-srt [false ] output result in a srt file
|
-osrt, --output-srt [false ] output result in a srt file
|
||||||
-owts, --output-words [false ] output script for generating karaoke video
|
-owts, --output-words [false ] output script for generating karaoke video
|
||||||
|
-ocsv, --output-csv [false ] output result in a CSV file
|
||||||
|
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
|
||||||
-ps, --print-special [false ] print special tokens
|
-ps, --print-special [false ] print special tokens
|
||||||
-pc, --print-colors [false ] print colors
|
-pc, --print-colors [false ] print colors
|
||||||
|
-pp, --print-progress [false ] print progress
|
||||||
-nt, --no-timestamps [true ] do not print timestamps
|
-nt, --no-timestamps [true ] do not print timestamps
|
||||||
-l LANG, --language LANG [en ] spoken language
|
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
|
||||||
|
--prompt PROMPT [ ] initial prompt
|
||||||
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
|
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
|
||||||
-f FNAME, --file FNAME [ ] input WAV file path
|
-f FNAME, --file FNAME [ ] input WAV file path
|
||||||
```
|
```
|
||||||
|
@ -63,12 +63,13 @@ struct whisper_params {
|
|||||||
int32_t beam_size = -1;
|
int32_t beam_size = -1;
|
||||||
|
|
||||||
float word_thold = 0.01f;
|
float word_thold = 0.01f;
|
||||||
float entropy_thold = 2.4f;
|
float entropy_thold = 2.40f;
|
||||||
float logprob_thold = -1.0f;
|
float logprob_thold = -1.00f;
|
||||||
|
|
||||||
bool speed_up = false;
|
bool speed_up = false;
|
||||||
bool translate = false;
|
bool translate = false;
|
||||||
bool diarize = false;
|
bool diarize = false;
|
||||||
|
bool no_fallback = false;
|
||||||
bool output_txt = false;
|
bool output_txt = false;
|
||||||
bool output_vtt = false;
|
bool output_vtt = false;
|
||||||
bool output_srt = false;
|
bool output_srt = false;
|
||||||
@ -117,6 +118,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|||||||
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
||||||
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
||||||
else if (arg == "-di" || arg == "--diarize") { params.diarize = true; }
|
else if (arg == "-di" || arg == "--diarize") { params.diarize = true; }
|
||||||
|
else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; }
|
||||||
else if (arg == "-otxt" || arg == "--output-txt") { params.output_txt = true; }
|
else if (arg == "-otxt" || arg == "--output-txt") { params.output_txt = true; }
|
||||||
else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; }
|
else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; }
|
||||||
else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
|
else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
|
||||||
@ -162,6 +164,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|||||||
fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
|
fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
|
||||||
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
||||||
fprintf(stderr, " -di, --diarize [%-7s] stereo audio diarization\n", params.diarize ? "true" : "false");
|
fprintf(stderr, " -di, --diarize [%-7s] stereo audio diarization\n", params.diarize ? "true" : "false");
|
||||||
|
fprintf(stderr, " -nf, --no-fallback [%-7s] do not use temperature fallback while decoding\n", params.no_fallback ? "true" : "false");
|
||||||
fprintf(stderr, " -otxt, --output-txt [%-7s] output result in a text file\n", params.output_txt ? "true" : "false");
|
fprintf(stderr, " -otxt, --output-txt [%-7s] output result in a text file\n", params.output_txt ? "true" : "false");
|
||||||
fprintf(stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", params.output_vtt ? "true" : "false");
|
fprintf(stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", params.output_vtt ? "true" : "false");
|
||||||
fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false");
|
fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false");
|
||||||
@ -514,7 +517,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
|
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
|
||||||
const auto fname_inp = params.fname_inp[f];
|
const auto fname_inp = params.fname_inp[f];
|
||||||
const auto fname_outp = f < params.fname_outp.size() && !params.fname_outp[f].empty() ? params.fname_outp[f] : params.fname_inp[f];
|
const auto fname_outp = f < (int) params.fname_outp.size() && !params.fname_outp[f].empty() ? params.fname_outp[f] : params.fname_inp[f];
|
||||||
|
|
||||||
std::vector<float> pcmf32; // mono-channel F32 PCM
|
std::vector<float> pcmf32; // mono-channel F32 PCM
|
||||||
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
||||||
@ -647,17 +650,19 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
wparams.token_timestamps = params.output_wts || params.max_len > 0;
|
wparams.token_timestamps = params.output_wts || params.max_len > 0;
|
||||||
wparams.thold_pt = params.word_thold;
|
wparams.thold_pt = params.word_thold;
|
||||||
wparams.entropy_thold = params.entropy_thold;
|
|
||||||
wparams.logprob_thold = params.logprob_thold;
|
|
||||||
wparams.max_len = params.output_wts && params.max_len == 0 ? 60 : params.max_len;
|
wparams.max_len = params.output_wts && params.max_len == 0 ? 60 : params.max_len;
|
||||||
|
|
||||||
wparams.speed_up = params.speed_up;
|
wparams.speed_up = params.speed_up;
|
||||||
|
|
||||||
|
wparams.prompt_tokens = prompt_tokens.empty() ? nullptr : prompt_tokens.data();
|
||||||
|
wparams.prompt_n_tokens = prompt_tokens.empty() ? 0 : prompt_tokens.size();
|
||||||
|
|
||||||
wparams.greedy.best_of = params.best_of;
|
wparams.greedy.best_of = params.best_of;
|
||||||
wparams.beam_search.beam_size = params.beam_size;
|
wparams.beam_search.beam_size = params.beam_size;
|
||||||
|
|
||||||
wparams.prompt_tokens = prompt_tokens.empty() ? nullptr : prompt_tokens.data();
|
wparams.temperature_inc = params.no_fallback ? 0.0f : wparams.temperature_inc;
|
||||||
wparams.prompt_n_tokens = prompt_tokens.empty() ? 0 : prompt_tokens.size();
|
wparams.entropy_thold = params.entropy_thold;
|
||||||
|
wparams.logprob_thold = params.logprob_thold;
|
||||||
|
|
||||||
whisper_print_user_data user_data = { ¶ms, &pcmf32s };
|
whisper_print_user_data user_data = { ¶ms, &pcmf32s };
|
||||||
|
|
||||||
|
107
ggml.c
107
ggml.c
@ -1258,7 +1258,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||||||
//
|
//
|
||||||
|
|
||||||
struct ggml_object {
|
struct ggml_object {
|
||||||
size_t offset;
|
size_t offs;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
|
||||||
struct ggml_object * next;
|
struct ggml_object * next;
|
||||||
@ -1284,6 +1284,9 @@ struct ggml_context {
|
|||||||
|
|
||||||
struct ggml_object * objects_begin;
|
struct ggml_object * objects_begin;
|
||||||
struct ggml_object * objects_end;
|
struct ggml_object * objects_end;
|
||||||
|
|
||||||
|
struct ggml_scratch scratch;
|
||||||
|
struct ggml_scratch scratch_save;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_context_container {
|
struct ggml_context_container {
|
||||||
@ -1346,7 +1349,7 @@ inline static void ggml_critical_section_end(void) {
|
|||||||
|
|
||||||
void ggml_print_object(const struct ggml_object * obj) {
|
void ggml_print_object(const struct ggml_object * obj) {
|
||||||
GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
|
GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
|
||||||
obj->offset, obj->size, (const void *) obj->next);
|
obj->offs, obj->size, (const void *) obj->next);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_print_objects(const struct ggml_context * ctx) {
|
void ggml_print_objects(const struct ggml_context * ctx) {
|
||||||
@ -1542,12 +1545,14 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
*ctx = (struct ggml_context) {
|
*ctx = (struct ggml_context) {
|
||||||
.mem_size = params.mem_size,
|
/*.mem_size =*/ params.mem_size,
|
||||||
.mem_buffer = params.mem_buffer ? params.mem_buffer : malloc(params.mem_size),
|
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : malloc(params.mem_size),
|
||||||
.mem_buffer_owned = params.mem_buffer ? false : true,
|
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
|
||||||
.n_objects = 0,
|
/*.n_objects =*/ 0,
|
||||||
.objects_begin = NULL,
|
/*.objects_begin =*/ NULL,
|
||||||
.objects_end = NULL,
|
/*.objects_end =*/ NULL,
|
||||||
|
/*.scratch =*/ { 0, 0, NULL, },
|
||||||
|
/*.scratch_save =*/ { 0, 0, NULL, },
|
||||||
};
|
};
|
||||||
|
|
||||||
ggml_assert_aligned(ctx->mem_buffer);
|
ggml_assert_aligned(ctx->mem_buffer);
|
||||||
@ -1570,7 +1575,7 @@ void ggml_free(struct ggml_context * ctx) {
|
|||||||
g_state.contexts[i].used = false;
|
g_state.contexts[i].used = false;
|
||||||
|
|
||||||
GGML_PRINT_DEBUG("%s: context %d with %d objects has been freed. memory used = %zu\n",
|
GGML_PRINT_DEBUG("%s: context %d with %d objects has been freed. memory used = %zu\n",
|
||||||
__func__, i, ctx->n_objects, ctx->objects_end->offset + ctx->objects_end->size);
|
__func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size);
|
||||||
|
|
||||||
if (ctx->mem_buffer_owned) {
|
if (ctx->mem_buffer_owned) {
|
||||||
free(ctx->mem_buffer);
|
free(ctx->mem_buffer);
|
||||||
@ -1589,7 +1594,15 @@ void ggml_free(struct ggml_context * ctx) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t ggml_used_mem(const struct ggml_context * ctx) {
|
size_t ggml_used_mem(const struct ggml_context * ctx) {
|
||||||
return ctx->objects_end->offset + ctx->objects_end->size;
|
return ctx->objects_end->offs + ctx->objects_end->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) {
|
||||||
|
const size_t result = ctx->scratch.data ? ctx->scratch.offs : 0;
|
||||||
|
|
||||||
|
ctx->scratch = scratch;
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -1603,9 +1616,9 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|||||||
// always insert objects at the end of the context's memory pool
|
// always insert objects at the end of the context's memory pool
|
||||||
struct ggml_object * obj_cur = ctx->objects_end;
|
struct ggml_object * obj_cur = ctx->objects_end;
|
||||||
|
|
||||||
const size_t cur_offset = obj_cur == NULL ? 0 : obj_cur->offset;
|
const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
|
||||||
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
|
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
|
||||||
const size_t cur_end = cur_offset + cur_size;
|
const size_t cur_end = cur_offs + cur_size;
|
||||||
|
|
||||||
size_t size_needed = 0;
|
size_t size_needed = 0;
|
||||||
|
|
||||||
@ -1616,25 +1629,52 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|||||||
}
|
}
|
||||||
// align to GGML_MEM_ALIGN
|
// align to GGML_MEM_ALIGN
|
||||||
size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
|
size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char * const mem_buffer = ctx->mem_buffer;
|
||||||
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
||||||
|
|
||||||
|
if (ctx->scratch.data == NULL || data != NULL) {
|
||||||
size_needed += sizeof(struct ggml_tensor);
|
size_needed += sizeof(struct ggml_tensor);
|
||||||
|
|
||||||
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
||||||
GGML_PRINT("%s: not enough space in the context's memory pool\n", __func__);
|
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
||||||
|
__func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
|
||||||
assert(false);
|
assert(false);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
char * const mem_buffer = ctx->mem_buffer;
|
|
||||||
|
|
||||||
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
|
||||||
|
|
||||||
*obj_new = (struct ggml_object) {
|
*obj_new = (struct ggml_object) {
|
||||||
.offset = cur_end + GGML_OBJECT_SIZE,
|
.offs = cur_end + GGML_OBJECT_SIZE,
|
||||||
.size = size_needed,
|
.size = size_needed,
|
||||||
.next = NULL,
|
.next = NULL,
|
||||||
};
|
};
|
||||||
|
} else {
|
||||||
|
if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
|
||||||
|
GGML_PRINT("%s: not enough space in the scratch memory\n", __func__);
|
||||||
|
assert(false);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cur_end + sizeof(struct ggml_tensor) + GGML_OBJECT_SIZE > ctx->mem_size) {
|
||||||
|
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
||||||
|
__func__, cur_end + sizeof(struct ggml_tensor) + GGML_OBJECT_SIZE, ctx->mem_size);
|
||||||
|
assert(false);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
||||||
|
|
||||||
|
*obj_new = (struct ggml_object) {
|
||||||
|
.offs = cur_end + GGML_OBJECT_SIZE,
|
||||||
|
.size = sizeof(struct ggml_tensor),
|
||||||
|
.next = NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
//printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
|
||||||
|
|
||||||
|
ctx->scratch.offs += size_needed;
|
||||||
|
}
|
||||||
|
|
||||||
if (obj_cur != NULL) {
|
if (obj_cur != NULL) {
|
||||||
obj_cur->next = obj_new;
|
obj_cur->next = obj_new;
|
||||||
@ -1645,9 +1685,9 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|||||||
|
|
||||||
ctx->objects_end = obj_new;
|
ctx->objects_end = obj_new;
|
||||||
|
|
||||||
//GGML_PRINT_DEBUG("%s: inserted new object at %zu\n", __func__, cur_end);
|
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
||||||
|
|
||||||
struct ggml_tensor * const result = (struct ggml_tensor *)(mem_buffer + obj_new->offset);
|
struct ggml_tensor * const result = (struct ggml_tensor *)(mem_buffer + obj_new->offs);
|
||||||
|
|
||||||
ggml_assert_aligned(result);
|
ggml_assert_aligned(result);
|
||||||
|
|
||||||
@ -1690,7 +1730,7 @@ struct ggml_tensor * ggml_new_tensor(
|
|||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
enum ggml_type type,
|
enum ggml_type type,
|
||||||
int n_dims,
|
int n_dims,
|
||||||
const int* ne) {
|
const int * ne) {
|
||||||
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
|
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1732,16 +1772,26 @@ struct ggml_tensor * ggml_new_tensor_4d(
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
||||||
|
ctx->scratch_save = ctx->scratch;
|
||||||
|
ctx->scratch.data = NULL;
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1);
|
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1);
|
||||||
|
|
||||||
|
ctx->scratch = ctx->scratch_save;
|
||||||
|
|
||||||
ggml_set_i32(result, value);
|
ggml_set_i32(result, value);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
|
struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
|
||||||
|
ctx->scratch_save = ctx->scratch;
|
||||||
|
ctx->scratch.data = NULL;
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
||||||
|
|
||||||
|
ctx->scratch = ctx->scratch_save;
|
||||||
|
|
||||||
ggml_set_f32(result, value);
|
ggml_set_f32(result, value);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@ -2350,7 +2400,7 @@ struct ggml_tensor * ggml_repeat(
|
|||||||
result->op = GGML_OP_REPEAT;
|
result->op = GGML_OP_REPEAT;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
result->src0 = a;
|
result->src0 = a;
|
||||||
result->src1 = NULL;
|
result->src1 = b;
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -2966,9 +3016,7 @@ struct ggml_tensor * ggml_diag_mask_inf(
|
|||||||
// TODO: when implement backward, fix this:
|
// TODO: when implement backward, fix this:
|
||||||
//struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
//struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
||||||
|
struct ggml_tensor * b = ggml_new_i32(ctx, n_past);
|
||||||
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1);
|
|
||||||
((int32_t *) b->data)[0] = n_past;
|
|
||||||
|
|
||||||
result->op = GGML_OP_DIAG_MASK_INF;
|
result->op = GGML_OP_DIAG_MASK_INF;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -4300,7 +4348,9 @@ static bool ggml_compute_forward_mul_mat_use_blas(
|
|||||||
const int ne1 = dst->ne[1];
|
const int ne1 = dst->ne[1];
|
||||||
|
|
||||||
// TODO: find the optimal values for these
|
// TODO: find the optimal values for these
|
||||||
if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && ne0 >= 32 && ne1 >= 32 && ne10 >= 32) {
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && (
|
||||||
|
(ne0 >= 32 && ne1 >= 32 && ne10 >= 32)
|
||||||
|
)) {
|
||||||
//printf("BLAS: %d %d %d\n", ne0, ne1, ne10);
|
//printf("BLAS: %d %d %d\n", ne0, ne1, ne10);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -7289,6 +7339,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
node->n_tasks = 1; // TODO: this actually is doing nothing
|
node->n_tasks = 1; // TODO: this actually is doing nothing
|
||||||
// the threads are still spinning
|
// the threads are still spinning
|
||||||
cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]);
|
cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]);
|
||||||
|
//printf("src0: ne0 = %d, ne1 = %d, ne = %d\n", node->src0->ne[0], node->src0->ne[1], node->src0->ne[0]*node->src0->ne[1]);
|
||||||
|
//printf("src1: ne0 = %d, ne1 = %d, ne = %d\n", node->src1->ne[0], node->src1->ne[1], node->src1->ne[0]*node->src1->ne[1]);
|
||||||
|
//printf("cur = %zu\n", cur);
|
||||||
} else {
|
} else {
|
||||||
cur = sizeof(ggml_fp16_t)*ggml_nelements(node->src1);
|
cur = sizeof(ggml_fp16_t)*ggml_nelements(node->src1);
|
||||||
}
|
}
|
||||||
|
9
ggml.h
9
ggml.h
@ -301,6 +301,13 @@ struct ggml_cgraph {
|
|||||||
int64_t perf_time_us;
|
int64_t perf_time_us;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// scratch buffer
|
||||||
|
struct ggml_scratch {
|
||||||
|
size_t offs;
|
||||||
|
size_t size;
|
||||||
|
void * data;
|
||||||
|
};
|
||||||
|
|
||||||
struct ggml_init_params {
|
struct ggml_init_params {
|
||||||
// memory pool
|
// memory pool
|
||||||
size_t mem_size; // bytes
|
size_t mem_size; // bytes
|
||||||
@ -327,6 +334,8 @@ void ggml_free(struct ggml_context * ctx);
|
|||||||
|
|
||||||
size_t ggml_used_mem(const struct ggml_context * ctx);
|
size_t ggml_used_mem(const struct ggml_context * ctx);
|
||||||
|
|
||||||
|
size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
||||||
|
|
||||||
struct ggml_tensor * ggml_new_tensor(
|
struct ggml_tensor * ggml_new_tensor(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
enum ggml_type type,
|
enum ggml_type type,
|
||||||
|
754
whisper.cpp
754
whisper.cpp
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user