bench : fix Windows linkage by moving ggml benches in whisper lib ..

2023-01-18 21:16:25 +02:00
12 changed files with 82 additions and 118 deletions
--- a/.gitignore
+++ b/.gitignore
@ -18,7 +18,6 @@ build-sanitize-thread/
 /talk
 /bench

-arm_neon.h
 sync.sh
 libwhisper.a
 libwhisper.so
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,6 @@
 cmake_minimum_required (VERSION 3.0)

-project(whisper.cpp VERSION 1.1.1)
+project(whisper.cpp VERSION 1.1.0)

 # Add path to modules
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
--- a/README.md
+++ b/README.md
@ -4,7 +4,7 @@
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)

-Stable: [v1.1.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.1.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+Stable: [v1.0.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.0.4) / Beta: [v1.1.0](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.1.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)

 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:

--- a/bindings/ios
+++ b/bindings/ios
--- a/bindings/javascript/package.json
+++ b/bindings/javascript/package.json
@ -1,6 +1,6 @@
 {
  "name": "whisper.cpp",
-  "version": "1.1.1",
+  "version": "1.1.0",
  "description": "Whisper speech recognition",
  "main": "whisper.js",
  "scripts": {
--- a/bindings/javascript/whisper.js
+++ b/bindings/javascript/whisper.js
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -84,7 +84,6 @@ struct whisper_params {
    std::string model    = "models/ggml-base.en.bin";

    std::vector<std::string> fname_inp = {};
-    std::vector<std::string> fname_outp = {};
 };

 void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -122,7 +121,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-osrt" || arg == "--output-srt")     { params.output_srt     = true; }
        else if (arg == "-owts" || arg == "--output-words")   { params.output_wts     = true; }
        else if (arg == "-ocsv" || arg == "--output-csv")     { params.output_csv     = true; }
-        else if (arg == "-of"   || arg == "--output-file")    { params.fname_outp.emplace_back(argv[++i]); }
        else if (arg == "-ps"   || arg == "--print-special")  { params.print_special  = true; }
        else if (arg == "-pc"   || arg == "--print-colors")   { params.print_colors   = true; }
        else if (arg == "-pp"   || arg == "--print-progress") { params.print_progress = true; }
@ -146,36 +144,35 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
    fprintf(stderr, "\n");
    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,        --help              [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,      --threads N         [%-7d] number of threads to use during computation\n",    params.n_threads);
-    fprintf(stderr, "  -p N,      --processors N      [%-7d] number of processors to use during computation\n", params.n_processors);
-    fprintf(stderr, "  -ot N,     --offset-t N        [%-7d] time offset in milliseconds\n",                    params.offset_t_ms);
-    fprintf(stderr, "  -on N,     --offset-n N        [%-7d] segment index offset\n",                           params.offset_n);
-    fprintf(stderr, "  -d  N,     --duration N        [%-7d] duration of audio to process in milliseconds\n",   params.duration_ms);
-    fprintf(stderr, "  -mc N,     --max-context N     [%-7d] maximum number of text context tokens to store\n", params.max_context);
-    fprintf(stderr, "  -ml N,     --max-len N         [%-7d] maximum segment length in characters\n",           params.max_len);
-    fprintf(stderr, "  -bo N,     --best-of N         [%-7d] number of best candidates to keep\n",              params.best_of);
-    fprintf(stderr, "  -bs N,     --beam-size N       [%-7d] beam size for beam search\n",                      params.beam_size);
-    fprintf(stderr, "  -wt N,     --word-thold N      [%-7.2f] word timestamp probability threshold\n",         params.word_thold);
-    fprintf(stderr, "  -et N,     --entropy-thold N   [%-7.2f] entropy threshold for decoder fail\n",           params.entropy_thold);
-    fprintf(stderr, "  -lpt N,    --logprob-thold N   [%-7.2f] log probability threshold for decoder fail\n",   params.logprob_thold);
-    fprintf(stderr, "  -su,       --speed-up          [%-7s] speed up audio by x2 (reduced accuracy)\n",        params.speed_up ? "true" : "false");
-    fprintf(stderr, "  -tr,       --translate         [%-7s] translate from source language to english\n",      params.translate ? "true" : "false");
-    fprintf(stderr, "  -di,       --diarize           [%-7s] stereo audio diarization\n",                       params.diarize ? "true" : "false");
-    fprintf(stderr, "  -otxt,     --output-txt        [%-7s] output result in a text file\n",                   params.output_txt ? "true" : "false");
-    fprintf(stderr, "  -ovtt,     --output-vtt        [%-7s] output result in a vtt file\n",                    params.output_vtt ? "true" : "false");
-    fprintf(stderr, "  -osrt,     --output-srt        [%-7s] output result in a srt file\n",                    params.output_srt ? "true" : "false");
-    fprintf(stderr, "  -owts,     --output-words      [%-7s] output script for generating karaoke video\n",     params.output_wts ? "true" : "false");
-    fprintf(stderr, "  -ocsv,     --output-csv        [%-7s] output result in a CSV file\n",                    params.output_csv ? "true" : "false");
-    fprintf(stderr, "  -of FNAME, --output-file FNAME [%-7s] output file path (without file extension)\n",      "");
-    fprintf(stderr, "  -ps,       --print-special     [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
-    fprintf(stderr, "  -pc,       --print-colors      [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
-    fprintf(stderr, "  -pp,       --print-progress    [%-7s] print progress\n",                                 params.print_progress ? "true" : "false");
-    fprintf(stderr, "  -nt,       --no-timestamps     [%-7s] do not print timestamps\n",                        params.no_timestamps ? "false" : "true");
-    fprintf(stderr, "  -l LANG,   --language LANG     [%-7s] spoken language ('auto' for auto-detect)\n",       params.language.c_str());
-    fprintf(stderr, "             --prompt PROMPT     [%-7s] initial prompt\n",                                 params.prompt.c_str());
-    fprintf(stderr, "  -m FNAME,  --model FNAME       [%-7s] model path\n",                                     params.model.c_str());
-    fprintf(stderr, "  -f FNAME,  --file FNAME        [%-7s] input WAV file path\n",                            "");
+    fprintf(stderr, "  -h,       --help            [default] show this help message and exit\n");
+    fprintf(stderr, "  -t N,     --threads N       [%-7d] number of threads to use during computation\n",    params.n_threads);
+    fprintf(stderr, "  -p N,     --processors N    [%-7d] number of processors to use during computation\n", params.n_processors);
+    fprintf(stderr, "  -ot N,    --offset-t N      [%-7d] time offset in milliseconds\n",                    params.offset_t_ms);
+    fprintf(stderr, "  -on N,    --offset-n N      [%-7d] segment index offset\n",                           params.offset_n);
+    fprintf(stderr, "  -d  N,    --duration N      [%-7d] duration of audio to process in milliseconds\n",   params.duration_ms);
+    fprintf(stderr, "  -mc N,    --max-context N   [%-7d] maximum number of text context tokens to store\n", params.max_context);
+    fprintf(stderr, "  -ml N,    --max-len N       [%-7d] maximum segment length in characters\n",           params.max_len);
+    fprintf(stderr, "  -bo N,    --best-of N       [%-7d] number of best candidates to keep\n",              params.best_of);
+    fprintf(stderr, "  -bs N,    --beam-size N     [%-7d] beam size for beam search\n",                      params.beam_size);
+    fprintf(stderr, "  -wt N,    --word-thold N    [%-7.2f] word timestamp probability threshold\n",         params.word_thold);
+    fprintf(stderr, "  -et N,    --entropy-thold N [%-7.2f] entropy threshold for decoder fail\n",           params.entropy_thold);
+    fprintf(stderr, "  -lpt N,   --logprob-thold N [%-7.2f] log probability threshold for decoder fail\n",   params.logprob_thold);
+    fprintf(stderr, "  -su,      --speed-up        [%-7s] speed up audio by x2 (reduced accuracy)\n",        params.speed_up ? "true" : "false");
+    fprintf(stderr, "  -tr,      --translate       [%-7s] translate from source language to english\n",      params.translate ? "true" : "false");
+    fprintf(stderr, "  -di,      --diarize         [%-7s] stereo audio diarization\n",                       params.diarize ? "true" : "false");
+    fprintf(stderr, "  -otxt,    --output-txt      [%-7s] output result in a text file\n",                   params.output_txt ? "true" : "false");
+    fprintf(stderr, "  -ovtt,    --output-vtt      [%-7s] output result in a vtt file\n",                    params.output_vtt ? "true" : "false");
+    fprintf(stderr, "  -osrt,    --output-srt      [%-7s] output result in a srt file\n",                    params.output_srt ? "true" : "false");
+    fprintf(stderr, "  -owts,    --output-words    [%-7s] output script for generating karaoke video\n",     params.output_wts ? "true" : "false");
+    fprintf(stderr, "  -ocsv,    --output-csv      [%-7s] output result in a CSV file\n",                    params.output_csv ? "true" : "false");
+    fprintf(stderr, "  -ps,      --print-special   [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
+    fprintf(stderr, "  -pc,      --print-colors    [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
+    fprintf(stderr, "  -pp,      --print-progress  [%-7s] print progress\n",                                 params.print_progress ? "true" : "false");
+    fprintf(stderr, "  -nt,      --no-timestamps   [%-7s] do not print timestamps\n",                        params.no_timestamps ? "false" : "true");
+    fprintf(stderr, "  -l LANG,  --language LANG   [%-7s] spoken language ('auto' for auto-detect)\n",       params.language.c_str());
+    fprintf(stderr, "            --prompt PROMPT   [%-7s] initial prompt\n",                                 params.prompt.c_str());
+    fprintf(stderr, "  -m FNAME, --model FNAME     [%-7s] model path\n",                                     params.model.c_str());
+    fprintf(stderr, "  -f FNAME, --file FNAME      [%-7s] input WAV file path\n",                            "");
    fprintf(stderr, "\n");
 }

@ -517,7 +514,6 @@ int main(int argc, char ** argv) {

    for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
        const auto fname_inp = params.fname_inp[f];
-		const auto fname_outp = f < params.fname_outp.size() && !params.fname_outp[f].empty() ? params.fname_outp[f] : params.fname_inp[f];

        std::vector<float> pcmf32; // mono-channel F32 PCM
        std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
@ -658,6 +654,7 @@ int main(int argc, char ** argv) {

            wparams.greedy.best_of        = params.best_of;
            wparams.beam_search.beam_size = params.beam_size;
+            wparams.temperature_inc = -1;

            wparams.prompt_tokens     = prompt_tokens.empty() ? nullptr : prompt_tokens.data();
            wparams.prompt_n_tokens   = prompt_tokens.empty() ? 0       : prompt_tokens.size();
@ -695,31 +692,31 @@ int main(int argc, char ** argv) {

            // output to text file
            if (params.output_txt) {
-                const auto fname_txt = fname_outp + ".txt";
+                const auto fname_txt = fname_inp + ".txt";
                output_txt(ctx, fname_txt.c_str());
            }

            // output to VTT file
            if (params.output_vtt) {
-                const auto fname_vtt = fname_outp + ".vtt";
+                const auto fname_vtt = fname_inp + ".vtt";
                output_vtt(ctx, fname_vtt.c_str());
            }

            // output to SRT file
            if (params.output_srt) {
-                const auto fname_srt = fname_outp + ".srt";
+                const auto fname_srt = fname_inp + ".srt";
                output_srt(ctx, fname_srt.c_str(), params);
            }

            // output to WTS file
            if (params.output_wts) {
-                const auto fname_wts = fname_outp + ".wts";
+                const auto fname_wts = fname_inp + ".wts";
                output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE);
            }

 	    // output to CSV file
            if (params.output_csv) {
-                const auto fname_csv = fname_outp + ".csv";
+                const auto fname_csv = fname_inp + ".csv";
                output_csv(ctx, fname_csv.c_str());
            }

--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@ -423,8 +423,7 @@ int main(int argc, char ** argv) {
        return 1;
    }

-    params.keep_ms   = std::min(params.keep_ms,   params.step_ms);
-    params.length_ms = std::max(params.length_ms, params.step_ms);
+    params.keep_ms = std::min(params.keep_ms, params.step_ms); // cannot be more than step_ms

    const int n_samples_step = (params.step_ms  *1e-3)*WHISPER_SAMPLE_RATE;
    const int n_samples_len  = (params.length_ms*1e-3)*WHISPER_SAMPLE_RATE;
@ -433,7 +432,7 @@ int main(int argc, char ** argv) {

    const bool use_vad = n_samples_step <= 0; // sliding window mode uses VAD

-    const int n_new_line = !use_vad ? std::max(1, params.length_ms / params.step_ms - 1) : 1; // number of steps to print new line
+    const int n_new_line = !use_vad ? params.length_ms / params.step_ms - 1 : 1; // number of steps to print new line

    params.no_timestamps  = !use_vad;
    params.no_context    |= use_vad;
--- a/examples/whisper.wasm/CMakeLists.txt
+++ b/examples/whisper.wasm/CMakeLists.txt
@ -32,8 +32,8 @@ set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
    --bind \
    -s USE_PTHREADS=1 \
    -s PTHREAD_POOL_SIZE=8 \
-    -s INITIAL_MEMORY=1500MB \
-    -s TOTAL_MEMORY=1500MB \
+    -s INITIAL_MEMORY=1024MB \
+    -s TOTAL_MEMORY=1024MB \
    -s FORCE_FILESYSTEM=1 \
    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
    ${EXTRA_FLAGS} \
--- a/examples/whisper.wasm/index-tmpl.html
+++ b/examples/whisper.wasm/index-tmpl.html
@ -46,12 +46,10 @@

            <div id="model">
                Whisper model: <span id="model-whisper-status"></span>
-                <button id="fetch-whisper-tiny-en"  onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
-                <button id="fetch-whisper-tiny"     onclick="loadWhisper('tiny')">tiny (75 MB)</button>
-                <button id="fetch-whisper-base-en"  onclick="loadWhisper('base.en')">base.en (142 MB)</button>
-                <button id="fetch-whisper-base"     onclick="loadWhisper('base')">base (142 MB)</button>
-                <button id="fetch-whisper-small-en" onclick="loadWhisper('small.en')">small.en (466 MB)</button>
-                <button id="fetch-whisper-small"    onclick="loadWhisper('small')">small (466 MB)</button>
+                <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
+                <button id="fetch-whisper-tiny"    onclick="loadWhisper('tiny')">tiny (75 MB)</button>
+                <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
+                <button id="fetch-whisper-base"    onclick="loadWhisper('base')">base (142 MB)</button>
                <span id="fetch-whisper-progress"></span>

                <input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
@ -286,33 +284,27 @@
                }
                reader.readAsArrayBuffer(file);

-                document.getElementById('fetch-whisper-tiny-en' ).style.display = 'none';
-                document.getElementById('fetch-whisper-base-en' ).style.display = 'none';
-                document.getElementById('fetch-whisper-small-en').style.display = 'none';
-                document.getElementById('fetch-whisper-tiny'    ).style.display = 'none';
-                document.getElementById('fetch-whisper-base'    ).style.display = 'none';
-                document.getElementById('fetch-whisper-small'   ).style.display = 'none';
-                document.getElementById('whisper-file'          ).style.display = 'none';
-                document.getElementById('model-whisper-status'  ).innerHTML = 'loaded model: ' + file.name;
+                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
+                document.getElementById('fetch-whisper-base-en').style.display = 'none';
+                document.getElementById('fetch-whisper-tiny'   ).style.display = 'none';
+                document.getElementById('fetch-whisper-base'   ).style.display = 'none';
+                document.getElementById('whisper-file'         ).style.display = 'none';
+                document.getElementById('model-whisper-status' ).innerHTML = 'loaded model: ' + file.name;
            }

            function loadWhisper(model) {
                let urls = {
-                    'tiny.en':  'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
-                    'tiny':     'https://whisper.ggerganov.com/ggml-model-whisper-tiny.bin',
-                    'base.en':  'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
-                    'base':     'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
-                    'small.en': 'https://whisper.ggerganov.com/ggml-model-whisper-small.en.bin',
-                    'small':    'https://whisper.ggerganov.com/ggml-model-whisper-small.bin',
+                    'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
+                    'tiny':    'https://whisper.ggerganov.com/ggml-model-whisper-tiny.bin',
+                    'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
+                    'base':    'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
                };

                let sizes = {
-                    'tiny.en':  75,
-                    'tiny':     75,
-                    'base.en':  142,
-                    'base':     142,
-                    'small.en': 466,
-                    'small':    466,
+                    'tiny.en': 75,
+                    'tiny':    75,
+                    'base.en': 142,
+                    'base':    142,
                };

                let url     = urls[model];
@ -321,14 +313,12 @@

                model_whisper = model;

-                document.getElementById('fetch-whisper-tiny-en' ).style.display = 'none';
-                document.getElementById('fetch-whisper-base-en' ).style.display = 'none';
-                document.getElementById('fetch-whisper-small-en').style.display = 'none';
-                document.getElementById('fetch-whisper-tiny'    ).style.display = 'none';
-                document.getElementById('fetch-whisper-base'    ).style.display = 'none';
-                document.getElementById('fetch-whisper-small'   ).style.display = 'none';
-                document.getElementById('whisper-file'          ).style.display = 'none';
-                document.getElementById('model-whisper-status'  ).innerHTML = 'loading model: ' + model;
+                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
+                document.getElementById('fetch-whisper-base-en').style.display = 'none';
+                document.getElementById('fetch-whisper-tiny'   ).style.display = 'none';
+                document.getElementById('fetch-whisper-base'   ).style.display = 'none';
+                document.getElementById('whisper-file'         ).style.display = 'none';
+                document.getElementById('model-whisper-status' ).innerHTML = 'loading model: ' + model;

                cbProgress = function(p) {
                    let el = document.getElementById('fetch-whisper-progress');
@ -337,14 +327,12 @@

                cbCancel = function() {
                    var el;
-                    el = document.getElementById('fetch-whisper-tiny-en' ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-base-en' ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-small-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-tiny'    ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-base'    ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-small'   ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('whisper-file'          ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('model-whisper-status'  ); if (el) el.innerHTML = '';
+                    el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('fetch-whisper-tiny'   ); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('fetch-whisper-base'   ); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('whisper-file'         ); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('model-whisper-status' ); if (el) el.innerHTML = '';
                };

                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
--- a/extra/bench-all.sh
+++ b/extra/bench-all.sh
@ -19,7 +19,7 @@ printf "\n"
 ./bench -w 1 -t 1 2>&1

 printf "\n"
-printf "Running ggml_mul_mat benchmark with $n_threads threads\n"
+printf "Running ggml_mul_mat benchmark with " $n_threads " threads\n"
 printf "\n"

 ./bench -w 2 -t $n_threads 2>&1
--- a/whisper.cpp
+++ b/whisper.cpp
@ -474,12 +474,6 @@ struct whisper_context {
    int64_t t_decode_us = 0;
    int64_t t_start_us  = 0;

-    int32_t n_sample = 0; // number of tokens sampled
-    int32_t n_encode = 0; // number of encoder calls
-    int32_t n_decode = 0; // number of decoder calls
-    int32_t n_fail_p = 0; // number of logprob threshold failures
-    int32_t n_fail_h = 0; // number of entropy threshold failures
-
    ggml_type wtype; // weight type (FP32 or FP16)

    whisper_mel mel;
@ -1626,7 +1620,6 @@ static bool whisper_encode(
    ggml_free(ctx0);

    wctx.t_encode_us += ggml_time_us() - t_start_us;
-    wctx.n_encode++;

    return true;
 }
@ -2000,7 +1993,6 @@ static bool whisper_decode(
    ggml_free(ctx0);

    wctx.t_decode_us += ggml_time_us() - t_start_us;
-    wctx.n_decode++;

    return true;
 }
@ -2652,17 +2644,12 @@ whisper_token whisper_token_transcribe(void) {
 void whisper_print_timings(struct whisper_context * ctx) {
    const int64_t t_end_us = ggml_time_us();

-    const int32_t n_sample = std::max(1, ctx->n_sample);
-    const int32_t n_encode = std::max(1, ctx->n_encode);
-    const int32_t n_decode = std::max(1, ctx->n_decode);
-
    fprintf(stderr, "\n");
-    fprintf(stderr, "%s:     fallbacks = %3d p / %3d h\n", __func__, ctx->n_fail_p, ctx->n_fail_h);
    fprintf(stderr, "%s:     load time = %8.2f ms\n", __func__, ctx->t_load_us/1000.0f);
    fprintf(stderr, "%s:      mel time = %8.2f ms\n", __func__, ctx->t_mel_us/1000.0f);
-    fprintf(stderr, "%s:   sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_sample_us, n_sample, 1e-3f*ctx->t_sample_us/n_sample);
-    fprintf(stderr, "%s:   encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_encode_us, n_encode, 1e-3f*ctx->t_encode_us/n_encode);
-    fprintf(stderr, "%s:   decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_decode_us, n_decode, 1e-3f*ctx->t_decode_us/n_decode);
+    fprintf(stderr, "%s:   sample time = %8.2f ms\n", __func__, ctx->t_sample_us/1000.0f);
+    fprintf(stderr, "%s:   encode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_encode_us/1000.0f, ctx->t_encode_us/1000.0f/ctx->model.hparams.n_audio_layer);
+    fprintf(stderr, "%s:   decode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_decode_us/1000.0f, ctx->t_decode_us/1000.0f/ctx->model.hparams.n_text_layer);
    fprintf(stderr, "%s:    total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
 }

@ -3017,7 +3004,7 @@ static void whisper_process_logits(
 }

 static whisper_token_data whisper_sample_token(
-            whisper_context & ctx,
+      const whisper_context & ctx,
      const whisper_decoder & decoder,
                       bool   best) {
    whisper_token_data result = {
@ -3072,8 +3059,6 @@ static whisper_token_data whisper_sample_token(
        result.pt  = result.p;
    }

-    ctx.n_sample++;
-
    return result;
 }

@ -3142,8 +3127,6 @@ static std::vector<whisper_token_data> whisper_sample_token_topk(
        }
    }

-    ctx.n_sample++;
-
    return result;
 }

@ -3449,7 +3432,7 @@ int whisper_full(
                prompt.clear();

                // if we have already generated some text, use it as a prompt to condition the next generation
-                if (!prompt_past.empty() && t_cur < 0.5f) {
+                if (!prompt_past.empty() && t_cur > 0.5f) {
                    int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));

                    prompt = { whisper_token_prev(ctx) };
@ -3738,12 +3721,11 @@ int whisper_full(
                    WHISPER_PRINT_DEBUG("%s: decoder %2d: score = %8.5f, result_len = %3d, avg_logprobs = %8.5f, entropy = %8.5f\n",
                            __func__, j, decoder.sequence.score, decoder.sequence.result_len, decoder.sequence.avg_logprobs, decoder.sequence.entropy);

-                    if (decoder.sequence.result_len > 32 && decoder.sequence.entropy < params.entropy_thold) {
+                    if (decoder.sequence.result_len > 8 && decoder.sequence.entropy < params.entropy_thold) {
                        WHISPER_PRINT_DEBUG("%s: decoder %2d: failed due to entropy %8.5f < %8.5f\n",
                                __func__, j, decoder.sequence.entropy, params.entropy_thold);

                        decoder.failed = true;
-                        ctx->n_fail_h++;

                        continue;
                    }
@ -3765,7 +3747,6 @@ int whisper_full(

                if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) {
                    success = false;
-                    ctx->n_fail_p++;
                }

                if (success) {
@ -4122,7 +4103,7 @@ WHISPER_API int whisper_bench_memcpy(int n_threads) {

        for (size_t i = 0; i < size; i++) sum += dst[i];

-        fprintf(stderr, "sum:    %s %f\n", sum == -536870910.00 ? "ok" : "error", sum);
+        fprintf(stderr, "sum:    %s\n", sum == -536870910.00 ? "ok" : "error");
    }

    free(src);