release : v1.1.1

.gitignore : add arm_neon.h
whisper : fix condition for providing past prompt (critical)
2025-07-02 23:41:28 +02:00 · 2023-01-23 20:23:44 +02:00 · 2023-01-23 20:19:04 +02:00 · 2023-01-22 10:47:01 +02:00 · 2023-01-19 18:50:33 +02:00 · 2023-01-18 22:52:18 +02:00
21 changed files with 384 additions and 115 deletions
--- a/.gitignore
+++ b/.gitignore
@ -18,6 +18,7 @@ build-sanitize-thread/
 /talk
 /bench

+arm_neon.h
 sync.sh
 libwhisper.a
 libwhisper.so
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,6 @@
 cmake_minimum_required (VERSION 3.0)

-project(whisper.cpp VERSION 1.1.0)
+project(whisper.cpp VERSION 1.1.1)

 # Add path to modules
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
--- a/4
+++ b/4
@ -133,8 +133,8 @@ ifdef WHISPER_OPENBLAS
 	LDFLAGS += -lopenblas
 endif
 ifdef WHISPER_GPROF
-	CFLAGS  += -pg
-	CXXFLAGS  += -pg
+	CFLAGS   += -pg
+	CXXFLAGS += -pg
 endif
 ifneq ($(filter aarch64%,$(UNAME_M)),)
 endif
--- a/README.md
+++ b/README.md
@ -4,7 +4,7 @@
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)

-Stable: [1.0.4](https://github.com/ggerganov/whisper.cpp/releases/tag/1.0.4) / Beta: [1.1.0](https://github.com/ggerganov/whisper.cpp/releases/tag/1.1.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+Stable: [v1.1.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.1.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)

 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:

--- a/bindings/go/whisper.go
+++ b/bindings/go/whisper.go
@ -147,16 +147,6 @@ func (ctx *Context) Whisper_decode(tokens []Token, past, threads int) error {
 	}
 }

-// whisper_sample_best() returns the token with the highest probability
-func (ctx *Context) Whisper_sample_best() TokenData {
-	return TokenData(C.whisper_sample_best((*C.struct_whisper_context)(ctx)))
-}
-
-// whisper_sample_timestamp() returns the most probable timestamp token
-func (ctx *Context) Whisper_sample_timestamp(is_initial bool) TokenData {
-	return TokenData(C.whisper_sample_timestamp((*C.struct_whisper_context)(ctx), C.bool(is_initial)))
-}
-
 // Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens.
 // Returns the number of tokens on success
 func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error) {
--- a/bindings/ios
+++ b/bindings/ios
--- a/bindings/javascript/package.json
+++ b/bindings/javascript/package.json
@ -1,6 +1,6 @@
 {
  "name": "whisper.cpp",
-  "version": "1.1.0",
+  "version": "1.1.1",
  "description": "Whisper speech recognition",
  "main": "whisper.js",
  "scripts": {
--- a/bindings/javascript/whisper.js
+++ b/bindings/javascript/whisper.js
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@ -7,6 +7,7 @@
 // command-line parameters
 struct whisper_params {
    int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat

    std::string model = "models/ggml-base.en.bin";
 };
@ -23,6 +24,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        }
        else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
        else if (arg == "-m" || arg == "--model")   { params.model     = argv[++i]; }
+        else if (arg == "-w" || arg == "--what")    { params.what     = atoi(argv[++i]); }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params);
@ -41,16 +43,14 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -h,       --help        [default] show this help message and exit\n");
    fprintf(stderr, "  -t N,     --threads N   [%-7d] number of threads to use during computation\n", params.n_threads);
    fprintf(stderr, "  -m FNAME, --model FNAME [%-7s] model path\n",                                  params.model.c_str());
+    fprintf(stderr, "  -w N,     --what N      [%-7d] what to benchmark:\n",                          params.what);
+    fprintf(stderr, "                           %-7s  0 - whisper encoder\n",                         "");
+    fprintf(stderr, "                           %-7s  1 - memcpy\n",                                  "");
+    fprintf(stderr, "                           %-7s  2 - ggml_mul_mat\n",                            "");
    fprintf(stderr, "\n");
 }

-int main(int argc, char ** argv) {
-    whisper_params params;
-
-    if (whisper_params_parse(argc, argv, params) == false) {
-        return 1;
-    }
-
+int whisper_bench_encoder(const whisper_params & params) {
    // whisper init

    struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
@ -92,3 +92,22 @@ int main(int argc, char ** argv) {

    return 0;
 }
+
+int main(int argc, char ** argv) {
+    whisper_params params;
+
+    if (whisper_params_parse(argc, argv, params) == false) {
+        return 1;
+    }
+
+    int ret = -1;
+
+    switch (params.what) {
+        case 0: ret = whisper_bench_encoder(params);                break;
+        case 1: ret = whisper_bench_memcpy(params.n_threads);       break;
+        case 2: ret = whisper_bench_ggml_mul_mat(params.n_threads); break;
+        default: fprintf(stderr, "error: unknown benchmark: %d\n", params.what); break;
+    }
+
+    return ret;
+}
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -84,6 +84,7 @@ struct whisper_params {
    std::string model    = "models/ggml-base.en.bin";

    std::vector<std::string> fname_inp = {};
+    std::vector<std::string> fname_outp = {};
 };

 void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -121,6 +122,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-osrt" || arg == "--output-srt")     { params.output_srt     = true; }
        else if (arg == "-owts" || arg == "--output-words")   { params.output_wts     = true; }
        else if (arg == "-ocsv" || arg == "--output-csv")     { params.output_csv     = true; }
+        else if (arg == "-of"   || arg == "--output-file")    { params.fname_outp.emplace_back(argv[++i]); }
        else if (arg == "-ps"   || arg == "--print-special")  { params.print_special  = true; }
        else if (arg == "-pc"   || arg == "--print-colors")   { params.print_colors   = true; }
        else if (arg == "-pp"   || arg == "--print-progress") { params.print_progress = true; }
@ -144,35 +146,36 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
    fprintf(stderr, "\n");
    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,       --help            [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,     --threads N       [%-7d] number of threads to use during computation\n",    params.n_threads);
-    fprintf(stderr, "  -p N,     --processors N    [%-7d] number of processors to use during computation\n", params.n_processors);
-    fprintf(stderr, "  -ot N,    --offset-t N      [%-7d] time offset in milliseconds\n",                    params.offset_t_ms);
-    fprintf(stderr, "  -on N,    --offset-n N      [%-7d] segment index offset\n",                           params.offset_n);
-    fprintf(stderr, "  -d  N,    --duration N      [%-7d] duration of audio to process in milliseconds\n",   params.duration_ms);
-    fprintf(stderr, "  -mc N,    --max-context N   [%-7d] maximum number of text context tokens to store\n", params.max_context);
-    fprintf(stderr, "  -ml N,    --max-len N       [%-7d] maximum segment length in characters\n",           params.max_len);
-    fprintf(stderr, "  -bo N,    --best-of N       [%-7d] number of best candidates to keep\n",              params.best_of);
-    fprintf(stderr, "  -bs N,    --beam-size N     [%-7d] beam size for beam search\n",                      params.beam_size);
-    fprintf(stderr, "  -wt N,    --word-thold N    [%-7.2f] word timestamp probability threshold\n",         params.word_thold);
-    fprintf(stderr, "  -et N,    --entropy-thold N [%-7.2f] entropy threshold for decoder fail\n",           params.entropy_thold);
-    fprintf(stderr, "  -lpt N,   --logprob-thold N [%-7.2f] log probability threshold for decoder fail\n",   params.logprob_thold);
-    fprintf(stderr, "  -su,      --speed-up        [%-7s] speed up audio by x2 (reduced accuracy)\n",        params.speed_up ? "true" : "false");
-    fprintf(stderr, "  -tr,      --translate       [%-7s] translate from source language to english\n",      params.translate ? "true" : "false");
-    fprintf(stderr, "  -di,      --diarize         [%-7s] stereo audio diarization\n",                       params.diarize ? "true" : "false");
-    fprintf(stderr, "  -otxt,    --output-txt      [%-7s] output result in a text file\n",                   params.output_txt ? "true" : "false");
-    fprintf(stderr, "  -ovtt,    --output-vtt      [%-7s] output result in a vtt file\n",                    params.output_vtt ? "true" : "false");
-    fprintf(stderr, "  -osrt,    --output-srt      [%-7s] output result in a srt file\n",                    params.output_srt ? "true" : "false");
-    fprintf(stderr, "  -owts,    --output-words    [%-7s] output script for generating karaoke video\n",     params.output_wts ? "true" : "false");
-    fprintf(stderr, "  -ocsv,    --output-csv      [%-7s] output result in a CSV file\n",                    params.output_csv ? "true" : "false");
-    fprintf(stderr, "  -ps,      --print-special   [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
-    fprintf(stderr, "  -pc,      --print-colors    [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
-    fprintf(stderr, "  -pp,      --print-progress  [%-7s] print progress\n",                                 params.print_progress ? "true" : "false");
-    fprintf(stderr, "  -nt,      --no-timestamps   [%-7s] do not print timestamps\n",                        params.no_timestamps ? "false" : "true");
-    fprintf(stderr, "  -l LANG,  --language LANG   [%-7s] spoken language ('auto' for auto-detect)\n",       params.language.c_str());
-    fprintf(stderr, "            --prompt PROMPT   [%-7s] initial prompt\n",                                 params.prompt.c_str());
-    fprintf(stderr, "  -m FNAME, --model FNAME     [%-7s] model path\n",                                     params.model.c_str());
-    fprintf(stderr, "  -f FNAME, --file FNAME      [%-7s] input WAV file path\n",                            "");
+    fprintf(stderr, "  -h,        --help              [default] show this help message and exit\n");
+    fprintf(stderr, "  -t N,      --threads N         [%-7d] number of threads to use during computation\n",    params.n_threads);
+    fprintf(stderr, "  -p N,      --processors N      [%-7d] number of processors to use during computation\n", params.n_processors);
+    fprintf(stderr, "  -ot N,     --offset-t N        [%-7d] time offset in milliseconds\n",                    params.offset_t_ms);
+    fprintf(stderr, "  -on N,     --offset-n N        [%-7d] segment index offset\n",                           params.offset_n);
+    fprintf(stderr, "  -d  N,     --duration N        [%-7d] duration of audio to process in milliseconds\n",   params.duration_ms);
+    fprintf(stderr, "  -mc N,     --max-context N     [%-7d] maximum number of text context tokens to store\n", params.max_context);
+    fprintf(stderr, "  -ml N,     --max-len N         [%-7d] maximum segment length in characters\n",           params.max_len);
+    fprintf(stderr, "  -bo N,     --best-of N         [%-7d] number of best candidates to keep\n",              params.best_of);
+    fprintf(stderr, "  -bs N,     --beam-size N       [%-7d] beam size for beam search\n",                      params.beam_size);
+    fprintf(stderr, "  -wt N,     --word-thold N      [%-7.2f] word timestamp probability threshold\n",         params.word_thold);
+    fprintf(stderr, "  -et N,     --entropy-thold N   [%-7.2f] entropy threshold for decoder fail\n",           params.entropy_thold);
+    fprintf(stderr, "  -lpt N,    --logprob-thold N   [%-7.2f] log probability threshold for decoder fail\n",   params.logprob_thold);
+    fprintf(stderr, "  -su,       --speed-up          [%-7s] speed up audio by x2 (reduced accuracy)\n",        params.speed_up ? "true" : "false");
+    fprintf(stderr, "  -tr,       --translate         [%-7s] translate from source language to english\n",      params.translate ? "true" : "false");
+    fprintf(stderr, "  -di,       --diarize           [%-7s] stereo audio diarization\n",                       params.diarize ? "true" : "false");
+    fprintf(stderr, "  -otxt,     --output-txt        [%-7s] output result in a text file\n",                   params.output_txt ? "true" : "false");
+    fprintf(stderr, "  -ovtt,     --output-vtt        [%-7s] output result in a vtt file\n",                    params.output_vtt ? "true" : "false");
+    fprintf(stderr, "  -osrt,     --output-srt        [%-7s] output result in a srt file\n",                    params.output_srt ? "true" : "false");
+    fprintf(stderr, "  -owts,     --output-words      [%-7s] output script for generating karaoke video\n",     params.output_wts ? "true" : "false");
+    fprintf(stderr, "  -ocsv,     --output-csv        [%-7s] output result in a CSV file\n",                    params.output_csv ? "true" : "false");
+    fprintf(stderr, "  -of FNAME, --output-file FNAME [%-7s] output file path (without file extension)\n",      "");
+    fprintf(stderr, "  -ps,       --print-special     [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
+    fprintf(stderr, "  -pc,       --print-colors      [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
+    fprintf(stderr, "  -pp,       --print-progress    [%-7s] print progress\n",                                 params.print_progress ? "true" : "false");
+    fprintf(stderr, "  -nt,       --no-timestamps     [%-7s] do not print timestamps\n",                        params.no_timestamps ? "false" : "true");
+    fprintf(stderr, "  -l LANG,   --language LANG     [%-7s] spoken language ('auto' for auto-detect)\n",       params.language.c_str());
+    fprintf(stderr, "             --prompt PROMPT     [%-7s] initial prompt\n",                                 params.prompt.c_str());
+    fprintf(stderr, "  -m FNAME,  --model FNAME       [%-7s] model path\n",                                     params.model.c_str());
+    fprintf(stderr, "  -f FNAME,  --file FNAME        [%-7s] input WAV file path\n",                            "");
    fprintf(stderr, "\n");
 }

@ -514,6 +517,7 @@ int main(int argc, char ** argv) {

    for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
        const auto fname_inp = params.fname_inp[f];
+		const auto fname_outp = f < params.fname_outp.size() && !params.fname_outp[f].empty() ? params.fname_outp[f] : params.fname_inp[f];

        std::vector<float> pcmf32; // mono-channel F32 PCM
        std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
@ -654,7 +658,6 @@ int main(int argc, char ** argv) {

            wparams.greedy.best_of        = params.best_of;
            wparams.beam_search.beam_size = params.beam_size;
-            wparams.temperature_inc = -1;

            wparams.prompt_tokens     = prompt_tokens.empty() ? nullptr : prompt_tokens.data();
            wparams.prompt_n_tokens   = prompt_tokens.empty() ? 0       : prompt_tokens.size();
@ -692,31 +695,31 @@ int main(int argc, char ** argv) {

            // output to text file
            if (params.output_txt) {
-                const auto fname_txt = fname_inp + ".txt";
+                const auto fname_txt = fname_outp + ".txt";
                output_txt(ctx, fname_txt.c_str());
            }

            // output to VTT file
            if (params.output_vtt) {
-                const auto fname_vtt = fname_inp + ".vtt";
+                const auto fname_vtt = fname_outp + ".vtt";
                output_vtt(ctx, fname_vtt.c_str());
            }

            // output to SRT file
            if (params.output_srt) {
-                const auto fname_srt = fname_inp + ".srt";
+                const auto fname_srt = fname_outp + ".srt";
                output_srt(ctx, fname_srt.c_str(), params);
            }

            // output to WTS file
            if (params.output_wts) {
-                const auto fname_wts = fname_inp + ".wts";
+                const auto fname_wts = fname_outp + ".wts";
                output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE);
            }

 	    // output to CSV file
            if (params.output_csv) {
-                const auto fname_csv = fname_inp + ".csv";
+                const auto fname_csv = fname_outp + ".csv";
                output_csv(ctx, fname_csv.c_str());
            }

--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@ -423,7 +423,8 @@ int main(int argc, char ** argv) {
        return 1;
    }

-    params.keep_ms = std::min(params.keep_ms, params.step_ms); // cannot be more than step_ms
+    params.keep_ms   = std::min(params.keep_ms,   params.step_ms);
+    params.length_ms = std::max(params.length_ms, params.step_ms);

    const int n_samples_step = (params.step_ms  *1e-3)*WHISPER_SAMPLE_RATE;
    const int n_samples_len  = (params.length_ms*1e-3)*WHISPER_SAMPLE_RATE;
@ -432,11 +433,11 @@ int main(int argc, char ** argv) {

    const bool use_vad = n_samples_step <= 0; // sliding window mode uses VAD

-    const int n_new_line = !use_vad ? params.length_ms / params.step_ms - 1 : 1; // number of steps to print new line
+    const int n_new_line = !use_vad ? std::max(1, params.length_ms / params.step_ms - 1) : 1; // number of steps to print new line

-    params.no_timestamps = !use_vad;
-    params.no_context    = use_vad;
-    params.max_tokens    = 0;
+    params.no_timestamps  = !use_vad;
+    params.no_context    |= use_vad;
+    params.max_tokens     = 0;

    // init audio

@ -486,7 +487,7 @@ int main(int argc, char ** argv) {
                params.no_timestamps ? 0 : 1);

        if (!use_vad) {
-            fprintf(stderr, "%s: n_new_line = %d\n", __func__, n_new_line);
+            fprintf(stderr, "%s: n_new_line = %d, no_context = %d\n", __func__, n_new_line, params.no_context);
        } else {
            fprintf(stderr, "%s: using VAD, will transcribe on speech activity\n", __func__);
        }
--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt
@ -73,8 +73,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
        printMessage("Loading model...\n")
        val models = application.assets.list("models/")
        if (models != null) {
-            val inputstream = application.assets.open("models/" + models[0])
-            whisperContext = WhisperContext.createContextFromInputStream(inputstream)
+            whisperContext = WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
            printMessage("Loaded model ${models[0]}.\n")
        }

--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt
@ -1,5 +1,6 @@
 package com.whispercppdemo.whisper

+import android.content.res.AssetManager
 import android.os.Build
 import android.util.Log
 import kotlinx.coroutines.*
@ -56,6 +57,15 @@ class WhisperContext private constructor(private var ptr: Long) {
            }
            return WhisperContext(ptr)
        }
+
+        fun createContextFromAsset(assetManager: AssetManager, assetPath: String): WhisperContext {
+            val ptr = WhisperLib.initContextFromAsset(assetManager, assetPath)
+
+            if (ptr == 0L) {
+                throw java.lang.RuntimeException("Couldn't create context from asset $assetPath")
+            }
+            return WhisperContext(ptr)
+        }
    }
 }

@ -87,6 +97,7 @@ private class WhisperLib {

        // JNI methods
        external fun initContextFromInputStream(inputStream: InputStream): Long
+        external fun initContextFromAsset(assetManager: AssetManager, assetPath: String): Long
        external fun initContext(modelPath: String): Long
        external fun freeContext(contextPtr: Long)
        external fun fullTranscribe(contextPtr: Long, audioData: FloatArray)
--- a/examples/whisper.android/app/src/main/jni/whisper/Whisper.mk
+++ b/examples/whisper.android/app/src/main/jni/whisper/Whisper.mk
@ -1,5 +1,5 @@
 WHISPER_LIB_DIR := $(LOCAL_PATH)/../../../../../../../
-LOCAL_LDLIBS    := -llog
+LOCAL_LDLIBS    := -landroid -llog

 # Make the final output library smaller by only keeping the symbols referenced from the app.
 ifneq ($(APP_OPTIM),debug)
--- a/examples/whisper.android/app/src/main/jni/whisper/jni.c
+++ b/examples/whisper.android/app/src/main/jni/whisper/jni.c
@ -1,4 +1,6 @@
 #include <jni.h>
+#include <android/asset_manager.h>
+#include <android/asset_manager_jni.h>
 #include <android/log.h>
 #include <stdlib.h>
 #include <sys/sysinfo.h>
@ -9,6 +11,7 @@
 #define TAG "JNI"

 #define LOGI(...) __android_log_print(ANDROID_LOG_INFO,     TAG, __VA_ARGS__)
+#define LOGW(...) __android_log_print(ANDROID_LOG_WARN,     TAG, __VA_ARGS__)

 static inline int min(int a, int b) {
    return (a < b) ? a : b;
@ -91,6 +94,52 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromInputSt
    return (jlong) context;
 }

+static size_t asset_read(void *ctx, void *output, size_t read_size) {
+    return AAsset_read((AAsset *) ctx, output, read_size);
+}
+
+static bool asset_is_eof(void *ctx) {
+    return AAsset_getRemainingLength64((AAsset *) ctx) <= 0;
+}
+
+static void asset_close(void *ctx) {
+    AAsset_close((AAsset *) ctx);
+}
+
+static struct whisper_context *whisper_init_from_asset(
+        JNIEnv *env,
+        jobject assetManager,
+        const char *asset_path
+) {
+    LOGI("Loading model from asset '%s'\n", asset_path);
+    AAssetManager *asset_manager = AAssetManager_fromJava(env, assetManager);
+    AAsset *asset = AAssetManager_open(asset_manager, asset_path, AASSET_MODE_STREAMING);
+    if (!asset) {
+        LOGW("Failed to open '%s'\n", asset_path);
+        return NULL;
+    }
+
+    whisper_model_loader loader = {
+            .context = asset,
+            .read = &asset_read,
+            .eof = &asset_is_eof,
+            .close = &asset_close
+    };
+
+    return whisper_init(&loader);
+}
+
+JNIEXPORT jlong JNICALL
+Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromAsset(
+        JNIEnv *env, jobject thiz, jobject assetManager, jstring asset_path_str) {
+    UNUSED(thiz);
+    struct whisper_context *context = NULL;
+    const char *asset_path_chars = (*env)->GetStringUTFChars(env, asset_path_str, NULL);
+    context = whisper_init_from_asset(env, assetManager, asset_path_chars);
+    (*env)->ReleaseStringUTFChars(env, asset_path_str, asset_path_chars);
+    return (jlong) context;
+}
+
 JNIEXPORT jlong JNICALL
 Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
        JNIEnv *env, jobject thiz, jstring model_path_str) {
--- a/examples/whisper.wasm/CMakeLists.txt
+++ b/examples/whisper.wasm/CMakeLists.txt
@ -32,8 +32,8 @@ set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
    --bind \
    -s USE_PTHREADS=1 \
    -s PTHREAD_POOL_SIZE=8 \
-    -s INITIAL_MEMORY=1024MB \
-    -s TOTAL_MEMORY=1024MB \
+    -s INITIAL_MEMORY=1500MB \
+    -s TOTAL_MEMORY=1500MB \
    -s FORCE_FILESYSTEM=1 \
    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
    ${EXTRA_FLAGS} \
--- a/examples/whisper.wasm/index-tmpl.html
+++ b/examples/whisper.wasm/index-tmpl.html
@ -46,10 +46,12 @@

            <div id="model">
                Whisper model: <span id="model-whisper-status"></span>
-                <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
-                <button id="fetch-whisper-tiny"    onclick="loadWhisper('tiny')">tiny (75 MB)</button>
-                <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
-                <button id="fetch-whisper-base"    onclick="loadWhisper('base')">base (142 MB)</button>
+                <button id="fetch-whisper-tiny-en"  onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
+                <button id="fetch-whisper-tiny"     onclick="loadWhisper('tiny')">tiny (75 MB)</button>
+                <button id="fetch-whisper-base-en"  onclick="loadWhisper('base.en')">base.en (142 MB)</button>
+                <button id="fetch-whisper-base"     onclick="loadWhisper('base')">base (142 MB)</button>
+                <button id="fetch-whisper-small-en" onclick="loadWhisper('small.en')">small.en (466 MB)</button>
+                <button id="fetch-whisper-small"    onclick="loadWhisper('small')">small (466 MB)</button>
                <span id="fetch-whisper-progress"></span>

                <input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
@ -284,27 +286,33 @@
                }
                reader.readAsArrayBuffer(file);

-                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-                document.getElementById('fetch-whisper-base-en').style.display = 'none';
-                document.getElementById('fetch-whisper-tiny'   ).style.display = 'none';
-                document.getElementById('fetch-whisper-base'   ).style.display = 'none';
-                document.getElementById('whisper-file'         ).style.display = 'none';
-                document.getElementById('model-whisper-status' ).innerHTML = 'loaded model: ' + file.name;
+                document.getElementById('fetch-whisper-tiny-en' ).style.display = 'none';
+                document.getElementById('fetch-whisper-base-en' ).style.display = 'none';
+                document.getElementById('fetch-whisper-small-en').style.display = 'none';
+                document.getElementById('fetch-whisper-tiny'    ).style.display = 'none';
+                document.getElementById('fetch-whisper-base'    ).style.display = 'none';
+                document.getElementById('fetch-whisper-small'   ).style.display = 'none';
+                document.getElementById('whisper-file'          ).style.display = 'none';
+                document.getElementById('model-whisper-status'  ).innerHTML = 'loaded model: ' + file.name;
            }

            function loadWhisper(model) {
                let urls = {
-                    'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
-                    'tiny':    'https://whisper.ggerganov.com/ggml-model-whisper-tiny.bin',
-                    'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
-                    'base':    'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
+                    'tiny.en':  'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
+                    'tiny':     'https://whisper.ggerganov.com/ggml-model-whisper-tiny.bin',
+                    'base.en':  'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
+                    'base':     'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
+                    'small.en': 'https://whisper.ggerganov.com/ggml-model-whisper-small.en.bin',
+                    'small':    'https://whisper.ggerganov.com/ggml-model-whisper-small.bin',
                };

                let sizes = {
-                    'tiny.en': 75,
-                    'tiny':    75,
-                    'base.en': 142,
-                    'base':    142,
+                    'tiny.en':  75,
+                    'tiny':     75,
+                    'base.en':  142,
+                    'base':     142,
+                    'small.en': 466,
+                    'small':    466,
                };

                let url     = urls[model];
@ -313,12 +321,14 @@

                model_whisper = model;

-                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-                document.getElementById('fetch-whisper-base-en').style.display = 'none';
-                document.getElementById('fetch-whisper-tiny'   ).style.display = 'none';
-                document.getElementById('fetch-whisper-base'   ).style.display = 'none';
-                document.getElementById('whisper-file'         ).style.display = 'none';
-                document.getElementById('model-whisper-status' ).innerHTML = 'loading model: ' + model;
+                document.getElementById('fetch-whisper-tiny-en' ).style.display = 'none';
+                document.getElementById('fetch-whisper-base-en' ).style.display = 'none';
+                document.getElementById('fetch-whisper-small-en').style.display = 'none';
+                document.getElementById('fetch-whisper-tiny'    ).style.display = 'none';
+                document.getElementById('fetch-whisper-base'    ).style.display = 'none';
+                document.getElementById('fetch-whisper-small'   ).style.display = 'none';
+                document.getElementById('whisper-file'          ).style.display = 'none';
+                document.getElementById('model-whisper-status'  ).innerHTML = 'loading model: ' + model;

                cbProgress = function(p) {
                    let el = document.getElementById('fetch-whisper-progress');
@ -327,12 +337,14 @@

                cbCancel = function() {
                    var el;
-                    el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-tiny'   ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('fetch-whisper-base'   ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('whisper-file'         ); if (el) el.style.display = 'inline-block';
-                    el = document.getElementById('model-whisper-status' ); if (el) el.innerHTML = '';
+                    el = document.getElementById('fetch-whisper-tiny-en' ); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('fetch-whisper-base-en' ); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('fetch-whisper-small-en'); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('fetch-whisper-tiny'    ); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('fetch-whisper-base'    ); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('fetch-whisper-small'   ); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('whisper-file'          ); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('model-whisper-status'  ); if (el) el.innerHTML = '';
                };

                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
--- a/extra/bench-all.sh
+++ b/extra/bench-all.sh
@ -12,6 +12,18 @@ fi

 models=( "tiny" "base" "small" "medium" "large" )

+printf "\n"
+printf "Running memcpy benchmark with 1 thread\n"
+printf "\n"
+
+./bench -w 1 -t 1 2>&1
+
+printf "\n"
+printf "Running ggml_mul_mat benchmark with $n_threads threads\n"
+printf "\n"
+
+./bench -w 2 -t $n_threads 2>&1
+
 printf "\n"
 printf "Running benchmark for all models\n"
 printf "This can take a while!\n"
@ -56,4 +68,3 @@ for model in "${models[@]}"; do

    printf "| <todo> | <todo> | $config | $model | $n_threads | $load_time | $encode_time | $commit |\n"
 done
-
--- a/ggml.c
+++ b/ggml.c
@ -4373,7 +4373,9 @@ static void ggml_compute_forward_mul_mat_f32(
    if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
        GGML_ASSERT(nb10 == sizeof(float));

-        if (params->ith != 0) return;
+        if (params->ith != 0) {
+            return;
+        }

        if (params->type == GGML_TASK_INIT) {
            return;
@ -4616,7 +4618,9 @@ static void ggml_compute_forward_mul_mat_f16_f32(
    if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
        GGML_ASSERT(nb10 == sizeof(float));

-        if (params->ith != 0) return;
+        if (params->ith != 0) {
+            return;
+        }

        if (params->type == GGML_TASK_INIT) {
            return;
@ -7054,7 +7058,7 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
 #ifdef __APPLE__

 //#include <os/lock.h>
-
+//
 //typedef os_unfair_lock ggml_lock_t;
 //
 //#define ggml_lock_init(x)    UNUSED(x)
@ -7161,6 +7165,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
            if (state->params.ith < state->params.nth) {
                ggml_compute_forward(&state->params, state->node);
            }
+
            state->node = NULL;
        } else {
            break;
@ -7205,6 +7210,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                .node   = NULL,
                .shared = &state_shared,
            };
+
            int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
            assert(rc == 0);
            UNUSED(rc);
@ -7273,7 +7279,8 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                                node->src1->type == GGML_TYPE_F32) {
 #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
                                if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
-                                    node->n_tasks = 1;
+                                    node->n_tasks = 1; // TODO: this actually is doing nothing
+                                                       //       the threads are still spinning
                                    cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]);
                                } else {
                                    cur = sizeof(ggml_fp16_t)*ggml_nelements(node->src1);
--- a/whisper.cpp
+++ b/whisper.cpp
@ -474,6 +474,12 @@ struct whisper_context {
    int64_t t_decode_us = 0;
    int64_t t_start_us  = 0;

+    int32_t n_sample = 0; // number of tokens sampled
+    int32_t n_encode = 0; // number of encoder calls
+    int32_t n_decode = 0; // number of decoder calls
+    int32_t n_fail_p = 0; // number of logprob threshold failures
+    int32_t n_fail_h = 0; // number of entropy threshold failures
+
    ggml_type wtype; // weight type (FP32 or FP16)

    whisper_mel mel;
@ -1620,6 +1626,7 @@ static bool whisper_encode(
    ggml_free(ctx0);

    wctx.t_encode_us += ggml_time_us() - t_start_us;
+    wctx.n_encode++;

    return true;
 }
@ -1993,6 +2000,7 @@ static bool whisper_decode(
    ggml_free(ctx0);

    wctx.t_decode_us += ggml_time_us() - t_start_us;
+    wctx.n_decode++;

    return true;
 }
@ -2644,12 +2652,17 @@ whisper_token whisper_token_transcribe(void) {
 void whisper_print_timings(struct whisper_context * ctx) {
    const int64_t t_end_us = ggml_time_us();

+    const int32_t n_sample = std::max(1, ctx->n_sample);
+    const int32_t n_encode = std::max(1, ctx->n_encode);
+    const int32_t n_decode = std::max(1, ctx->n_decode);
+
    fprintf(stderr, "\n");
+    fprintf(stderr, "%s:     fallbacks = %3d p / %3d h\n", __func__, ctx->n_fail_p, ctx->n_fail_h);
    fprintf(stderr, "%s:     load time = %8.2f ms\n", __func__, ctx->t_load_us/1000.0f);
    fprintf(stderr, "%s:      mel time = %8.2f ms\n", __func__, ctx->t_mel_us/1000.0f);
-    fprintf(stderr, "%s:   sample time = %8.2f ms\n", __func__, ctx->t_sample_us/1000.0f);
-    fprintf(stderr, "%s:   encode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_encode_us/1000.0f, ctx->t_encode_us/1000.0f/ctx->model.hparams.n_audio_layer);
-    fprintf(stderr, "%s:   decode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_decode_us/1000.0f, ctx->t_decode_us/1000.0f/ctx->model.hparams.n_text_layer);
+    fprintf(stderr, "%s:   sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_sample_us, n_sample, 1e-3f*ctx->t_sample_us/n_sample);
+    fprintf(stderr, "%s:   encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_encode_us, n_encode, 1e-3f*ctx->t_encode_us/n_encode);
+    fprintf(stderr, "%s:   decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_decode_us, n_decode, 1e-3f*ctx->t_decode_us/n_decode);
    fprintf(stderr, "%s:    total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
 }

@ -3004,7 +3017,7 @@ static void whisper_process_logits(
 }

 static whisper_token_data whisper_sample_token(
-      const whisper_context & ctx,
+            whisper_context & ctx,
      const whisper_decoder & decoder,
                       bool   best) {
    whisper_token_data result = {
@ -3059,6 +3072,8 @@ static whisper_token_data whisper_sample_token(
        result.pt  = result.p;
    }

+    ctx.n_sample++;
+
    return result;
 }

@ -3091,10 +3106,10 @@ static std::vector<whisper_token_data> whisper_sample_token_topk(
    std::vector<whisper_token_data> result;
    result.reserve(k);

-    whisper_token tid;
+    whisper_token tid = vocab.token_beg;

-    float pt;
-    float ptsum;
+    float pt    = 0.0;
+    float ptsum = 0.0;

    {
        double sum_ts = 0.0;
@ -3127,6 +3142,8 @@ static std::vector<whisper_token_data> whisper_sample_token_topk(
        }
    }

+    ctx.n_sample++;
+
    return result;
 }

@ -3432,7 +3449,7 @@ int whisper_full(
                prompt.clear();

                // if we have already generated some text, use it as a prompt to condition the next generation
-                if (!prompt_past.empty() && t_cur > 0.5f) {
+                if (!prompt_past.empty() && t_cur < 0.5f) {
                    int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));

                    prompt = { whisper_token_prev(ctx) };
@ -3721,11 +3738,12 @@ int whisper_full(
                    WHISPER_PRINT_DEBUG("%s: decoder %2d: score = %8.5f, result_len = %3d, avg_logprobs = %8.5f, entropy = %8.5f\n",
                            __func__, j, decoder.sequence.score, decoder.sequence.result_len, decoder.sequence.avg_logprobs, decoder.sequence.entropy);

-                    if (decoder.sequence.result_len > 8 && decoder.sequence.entropy < params.entropy_thold) {
+                    if (decoder.sequence.result_len > 32 && decoder.sequence.entropy < params.entropy_thold) {
                        WHISPER_PRINT_DEBUG("%s: decoder %2d: failed due to entropy %8.5f < %8.5f\n",
                                __func__, j, decoder.sequence.entropy, params.entropy_thold);

                        decoder.failed = true;
+                        ctx->n_fail_h++;

                        continue;
                    }
@ -3747,6 +3765,7 @@ int whisper_full(

                if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) {
                    success = false;
+                    ctx->n_fail_p++;
                }

                if (success) {
@ -3801,6 +3820,7 @@ int whisper_full(

                    if (tokens_cur[i].id > whisper_token_beg(ctx) && !params.single_segment) {
                        const auto t1 = seek + 2*(tokens_cur[i].tid - whisper_token_beg(ctx));
+
                        if (!text.empty()) {
                            const auto tt0 = params.speed_up ? 2*t0 : t0;
                            const auto tt1 = params.speed_up ? 2*t1 : t1;
@ -4059,6 +4079,145 @@ float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int

 // =================================================================================================

+//
+// Temporary interface needed for exposing ggml interface
+// Will be removed in the future when ggml becomes a separate library
+//
+
+WHISPER_API int whisper_bench_memcpy(int n_threads) {
+    ggml_time_init();
+
+    size_t n    = 50;
+    size_t arr  = n_threads > 0 ? 1024 : n_threads; // trick to avoid compiler optimizations
+
+    // 1 GB array
+    const size_t size = arr*1024llu*1024llu;
+
+    char * src = (char *) malloc(size);
+    char * dst = (char *) malloc(size);
+
+    for (size_t i = 0; i < size; i++) src[i] = i;
+
+    memcpy(dst, src, size); // heat-up
+
+    double tsum = 0.0;
+
+    for (size_t i = 0; i < n; i++) {
+        const int64_t t0 = ggml_time_us();
+
+        memcpy(dst, src, size);
+
+        const int64_t t1 = ggml_time_us();
+
+        tsum += (t1 - t0)*1e-6;
+
+        src[0] = rand();
+    }
+
+    fprintf(stderr, "memcpy: %.2f GB/s\n", (double) (n*size)/(tsum*1024llu*1024llu*1024llu));
+
+    // needed to prevent the compile from optimizing the memcpy away
+    {
+        double sum = 0.0;
+
+        for (size_t i = 0; i < size; i++) sum += dst[i];
+
+        fprintf(stderr, "sum:    %s %f\n", sum == -536870910.00 ? "ok" : "error", sum);
+    }
+
+    free(src);
+    free(dst);
+
+    return 0;
+}
+
+WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
+    ggml_time_init();
+
+    const int n_max = 128;
+
+    const std::vector<size_t> sizes = {
+        64, 128, 256, 512, 1024, 2048, 4096,
+    };
+
+    const size_t N_max = sizes.back();
+
+    // a: N*N*sizeof(float)
+    // b: N*N*sizeof(float)
+    // c: N*N*sizeof(float)
+    // when F16 is used, there is an extra work buffer of size N*N*sizeof(float)
+    std::vector<char> buf(4llu*N_max*N_max*sizeof(float) + 4*256);
+
+    for (size_t i = 0; i < buf.size(); i++) buf[i] = i;
+
+    for (int j = 0; j < (int) sizes.size(); j++) {
+        int n_fp16 = 0;
+        int n_fp32 = 0;
+
+        // GFLOPS/s
+        double s_fp16 = 0.0;
+        double s_fp32 = 0.0;
+
+        const size_t N = sizes[j];
+
+        for (int k = 0; k < 2; ++k) {
+            const ggml_type wtype = k == 0 ? GGML_TYPE_F16 : GGML_TYPE_F32;
+
+            double & s = k == 0 ? s_fp16 : s_fp32;
+            int    & n = k == 0 ? n_fp16   : n_fp32;
+
+            struct ggml_init_params gparams = {
+                /*.mem_size   =*/ buf.size(),
+                /*.mem_buffer =*/ buf.data(),
+            };
+
+            struct ggml_context * ctx0 = ggml_init(gparams);
+
+            struct ggml_tensor * a = ggml_new_tensor_2d(ctx0, wtype,         N, N);
+            struct ggml_tensor * b = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, N, N);
+
+            struct ggml_tensor * c = ggml_mul_mat(ctx0, a, b);
+
+            struct ggml_cgraph gf = ggml_build_forward(c);
+
+            gf.n_threads = n_threads;
+
+            double tsum = 0.0;
+
+            // heat-up
+            ggml_graph_compute(ctx0, &gf);
+
+            for (int i = 0; i < n_max; ++i) {
+                const int64_t t0 = ggml_time_us();
+
+                ggml_graph_compute(ctx0, &gf);
+
+                const int64_t t1 = ggml_time_us();
+
+                tsum += (t1 - t0)*1e-6;
+                n++;
+
+                if (tsum > 1.0 && n >= 3) {
+                    break;
+                }
+            }
+
+            ggml_free(ctx0);
+
+            s = ((2.0*N*N*N*n)/tsum)*1e-9;
+        }
+
+        fprintf(stderr, "ggml_mul_mat: %5zu x %5zu: F16 %8.1f GFLOPS (%3d runs) / F32 %8.1f GFLOPS (%3d runs)\n",
+            N, N, s_fp16, n_fp16, s_fp32, n_fp32);
+    }
+
+    return 0;
+}
+
+// =================================================================================================
+
+// =================================================================================================
+
 //
 // Experimental stuff below
 //
--- a/whisper.h
+++ b/whisper.h
@ -245,7 +245,7 @@ extern "C" {
        int duration_ms;        // audio duration to process in ms

        bool translate;
-        bool no_context;        // do not use initial prompt for the decoder (if any)
+        bool no_context;        // do not use past transcription (if any) as initial prompt for the decoder
        bool single_segment;    // force single segment output (useful for streaming)
        bool print_special;     // print special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.)
        bool print_progress;    // print progress information
@ -350,6 +350,13 @@ extern "C" {
    // Get the probability of the specified token in the specified segment.
    WHISPER_API float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int i_token);

+    ////////////////////////////////////////////////////////////////////////////
+
+    // Temporary helpers needed for exposing ggml interface
+
+    WHISPER_API int whisper_bench_memcpy(int n_threads);
+    WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads);
+
 #ifdef __cplusplus
 }
 #endif
Author	SHA1	Message	Date
Georgi Gerganov	2c3f50a021	release : v1.1.1	2023-01-23 20:23:44 +02:00
Georgi Gerganov	9a65269a20	.gitignore : add arm_neon.h	2023-01-23 20:19:04 +02:00
Georgi Gerganov	78f166174f	whisper : fix condition for providing past prompt (critical) This bug has been present since v1.1.0. Effectively, the past transcribed text wasn't being used for following transcriptions, which likely significantly reduces the transcription quality. Likely related to #419	2023-01-22 10:47:01 +02:00
Georgi Gerganov	21c569ba4a	whisper : extend information in whisper_print_timings()	2023-01-19 18:50:33 +02:00
Georgi Gerganov	1a91c19af9	whisper : perform entropy check only when we have at least 32 tokens (#412 )	2023-01-18 22:52:18 +02:00
Georgi Gerganov	f583e2d2f5	main : we had accidentally disabled the temperature fallback .. (#291 )	2023-01-18 22:51:41 +02:00
Georgi Gerganov	206fc93396	whisper.wasm : add small and small.en models	2023-01-18 21:58:55 +02:00
Georgi Gerganov	a6cf6f4c4a	bench : minor fixes	2023-01-18 21:40:10 +02:00
Chia-Hsiang Cheng	472a473fd1	main : add an option to accept optional output filenames (#424 ) * Add an option to accept optional output filenames * Format the file Co-authored-by: Chia-Hsiang Cheng <gary.chiahsiang.cheng@gmail.com>	2023-01-18 21:26:31 +02:00
Georgi Gerganov	9ba66c2fad	stream : fix handling of --step == --length (#416 )	2023-01-18 21:22:52 +02:00
Georgi Gerganov	1ccb8a46a5	bench : fix Windows linkage by moving ggml benches in whisper lib ..	2023-01-18 21:19:50 +02:00
Georgi Gerganov	1290fc6457	bench : add memcpy and ggml_mul_mat benchmarks	2023-01-18 20:31:46 +02:00
Digipom	49b529ba74	whisper.android : add support for loading directly from asset in C (#415 )	2023-01-16 21:57:35 +02:00
Georgi Gerganov	8088a977af	whisper : fix possible uninitialized variables (#291 )	2023-01-16 21:44:40 +02:00
Georgi Gerganov	c9aeb33676	stream : fix --keep_context argument to be used correctly (#354 )	2023-01-16 19:37:40 +02:00
Damian Czaja	4a3f0d3fe9	go : remove sample_best and sample_timestamp bindings (#409 )	2023-01-16 19:18:10 +02:00
Georgi Gerganov	874bde887e	Update README.md	2023-01-16 18:47:31 +02:00