mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-07-04 16:30:58 +02:00
Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
7aa1174315 | |||
1290fc6457 | |||
49b529ba74 | |||
8088a977af | |||
c9aeb33676 | |||
4a3f0d3fe9 | |||
874bde887e |
4
Makefile
4
Makefile
@ -133,8 +133,8 @@ ifdef WHISPER_OPENBLAS
|
|||||||
LDFLAGS += -lopenblas
|
LDFLAGS += -lopenblas
|
||||||
endif
|
endif
|
||||||
ifdef WHISPER_GPROF
|
ifdef WHISPER_GPROF
|
||||||
CFLAGS += -pg
|
CFLAGS += -pg
|
||||||
CXXFLAGS += -pg
|
CXXFLAGS += -pg
|
||||||
endif
|
endif
|
||||||
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
||||||
endif
|
endif
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
[](https://opensource.org/licenses/MIT)
|
[](https://opensource.org/licenses/MIT)
|
||||||
[](https://www.npmjs.com/package/whisper.cpp/)
|
[](https://www.npmjs.com/package/whisper.cpp/)
|
||||||
|
|
||||||
Stable: [1.0.4](https://github.com/ggerganov/whisper.cpp/releases/tag/1.0.4) / Beta: [1.1.0](https://github.com/ggerganov/whisper.cpp/releases/tag/1.1.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
Stable: [v1.0.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.0.4) / Beta: [v1.1.0](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.1.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||||
|
|
||||||
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
||||||
|
|
||||||
|
@ -147,16 +147,6 @@ func (ctx *Context) Whisper_decode(tokens []Token, past, threads int) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// whisper_sample_best() returns the token with the highest probability
|
|
||||||
func (ctx *Context) Whisper_sample_best() TokenData {
|
|
||||||
return TokenData(C.whisper_sample_best((*C.struct_whisper_context)(ctx)))
|
|
||||||
}
|
|
||||||
|
|
||||||
// whisper_sample_timestamp() returns the most probable timestamp token
|
|
||||||
func (ctx *Context) Whisper_sample_timestamp(is_initial bool) TokenData {
|
|
||||||
return TokenData(C.whisper_sample_timestamp((*C.struct_whisper_context)(ctx), C.bool(is_initial)))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens.
|
// Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens.
|
||||||
// Returns the number of tokens on success
|
// Returns the number of tokens on success
|
||||||
func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error) {
|
func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error) {
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
// command-line parameters
|
// command-line parameters
|
||||||
struct whisper_params {
|
struct whisper_params {
|
||||||
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||||
|
int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat
|
||||||
|
|
||||||
std::string model = "models/ggml-base.en.bin";
|
std::string model = "models/ggml-base.en.bin";
|
||||||
};
|
};
|
||||||
@ -23,6 +24,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|||||||
}
|
}
|
||||||
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
||||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||||
|
else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); }
|
||||||
else {
|
else {
|
||||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||||
whisper_print_usage(argc, argv, params);
|
whisper_print_usage(argc, argv, params);
|
||||||
@ -41,16 +43,14 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|||||||
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
|
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
|
||||||
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
||||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||||
|
fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
|
||||||
|
fprintf(stderr, " %-7s 0 - whisper encoder\n", "");
|
||||||
|
fprintf(stderr, " %-7s 1 - memcpy\n", "");
|
||||||
|
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int whisper_bench_encoder(const whisper_params & params) {
|
||||||
whisper_params params;
|
|
||||||
|
|
||||||
if (whisper_params_parse(argc, argv, params) == false) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// whisper init
|
// whisper init
|
||||||
|
|
||||||
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
||||||
@ -92,3 +92,22 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int main(int argc, char ** argv) {
|
||||||
|
whisper_params params;
|
||||||
|
|
||||||
|
if (whisper_params_parse(argc, argv, params) == false) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ret = -1;
|
||||||
|
|
||||||
|
switch (params.what) {
|
||||||
|
case 0: ret = whisper_bench_encoder(params); break;
|
||||||
|
case 1: ret = whisper_bench_memcpy(params.n_threads); break;
|
||||||
|
case 2: ret = whisper_bench_ggml_mul_mat(params.n_threads); break;
|
||||||
|
default: fprintf(stderr, "error: unknown benchmark: %d\n", params.what); break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
@ -434,9 +434,9 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
const int n_new_line = !use_vad ? params.length_ms / params.step_ms - 1 : 1; // number of steps to print new line
|
const int n_new_line = !use_vad ? params.length_ms / params.step_ms - 1 : 1; // number of steps to print new line
|
||||||
|
|
||||||
params.no_timestamps = !use_vad;
|
params.no_timestamps = !use_vad;
|
||||||
params.no_context = use_vad;
|
params.no_context |= use_vad;
|
||||||
params.max_tokens = 0;
|
params.max_tokens = 0;
|
||||||
|
|
||||||
// init audio
|
// init audio
|
||||||
|
|
||||||
@ -486,7 +486,7 @@ int main(int argc, char ** argv) {
|
|||||||
params.no_timestamps ? 0 : 1);
|
params.no_timestamps ? 0 : 1);
|
||||||
|
|
||||||
if (!use_vad) {
|
if (!use_vad) {
|
||||||
fprintf(stderr, "%s: n_new_line = %d\n", __func__, n_new_line);
|
fprintf(stderr, "%s: n_new_line = %d, no_context = %d\n", __func__, n_new_line, params.no_context);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "%s: using VAD, will transcribe on speech activity\n", __func__);
|
fprintf(stderr, "%s: using VAD, will transcribe on speech activity\n", __func__);
|
||||||
}
|
}
|
||||||
|
@ -73,8 +73,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
|||||||
printMessage("Loading model...\n")
|
printMessage("Loading model...\n")
|
||||||
val models = application.assets.list("models/")
|
val models = application.assets.list("models/")
|
||||||
if (models != null) {
|
if (models != null) {
|
||||||
val inputstream = application.assets.open("models/" + models[0])
|
whisperContext = WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
|
||||||
whisperContext = WhisperContext.createContextFromInputStream(inputstream)
|
|
||||||
printMessage("Loaded model ${models[0]}.\n")
|
printMessage("Loaded model ${models[0]}.\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
package com.whispercppdemo.whisper
|
package com.whispercppdemo.whisper
|
||||||
|
|
||||||
|
import android.content.res.AssetManager
|
||||||
import android.os.Build
|
import android.os.Build
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
import kotlinx.coroutines.*
|
import kotlinx.coroutines.*
|
||||||
@ -56,6 +57,15 @@ class WhisperContext private constructor(private var ptr: Long) {
|
|||||||
}
|
}
|
||||||
return WhisperContext(ptr)
|
return WhisperContext(ptr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun createContextFromAsset(assetManager: AssetManager, assetPath: String): WhisperContext {
|
||||||
|
val ptr = WhisperLib.initContextFromAsset(assetManager, assetPath)
|
||||||
|
|
||||||
|
if (ptr == 0L) {
|
||||||
|
throw java.lang.RuntimeException("Couldn't create context from asset $assetPath")
|
||||||
|
}
|
||||||
|
return WhisperContext(ptr)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,6 +97,7 @@ private class WhisperLib {
|
|||||||
|
|
||||||
// JNI methods
|
// JNI methods
|
||||||
external fun initContextFromInputStream(inputStream: InputStream): Long
|
external fun initContextFromInputStream(inputStream: InputStream): Long
|
||||||
|
external fun initContextFromAsset(assetManager: AssetManager, assetPath: String): Long
|
||||||
external fun initContext(modelPath: String): Long
|
external fun initContext(modelPath: String): Long
|
||||||
external fun freeContext(contextPtr: Long)
|
external fun freeContext(contextPtr: Long)
|
||||||
external fun fullTranscribe(contextPtr: Long, audioData: FloatArray)
|
external fun fullTranscribe(contextPtr: Long, audioData: FloatArray)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
WHISPER_LIB_DIR := $(LOCAL_PATH)/../../../../../../../
|
WHISPER_LIB_DIR := $(LOCAL_PATH)/../../../../../../../
|
||||||
LOCAL_LDLIBS := -llog
|
LOCAL_LDLIBS := -landroid -llog
|
||||||
|
|
||||||
# Make the final output library smaller by only keeping the symbols referenced from the app.
|
# Make the final output library smaller by only keeping the symbols referenced from the app.
|
||||||
ifneq ($(APP_OPTIM),debug)
|
ifneq ($(APP_OPTIM),debug)
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
#include <jni.h>
|
#include <jni.h>
|
||||||
|
#include <android/asset_manager.h>
|
||||||
|
#include <android/asset_manager_jni.h>
|
||||||
#include <android/log.h>
|
#include <android/log.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <sys/sysinfo.h>
|
#include <sys/sysinfo.h>
|
||||||
@ -9,6 +11,7 @@
|
|||||||
#define TAG "JNI"
|
#define TAG "JNI"
|
||||||
|
|
||||||
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__)
|
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__)
|
||||||
|
#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, TAG, __VA_ARGS__)
|
||||||
|
|
||||||
static inline int min(int a, int b) {
|
static inline int min(int a, int b) {
|
||||||
return (a < b) ? a : b;
|
return (a < b) ? a : b;
|
||||||
@ -91,6 +94,52 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromInputSt
|
|||||||
return (jlong) context;
|
return (jlong) context;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t asset_read(void *ctx, void *output, size_t read_size) {
|
||||||
|
return AAsset_read((AAsset *) ctx, output, read_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool asset_is_eof(void *ctx) {
|
||||||
|
return AAsset_getRemainingLength64((AAsset *) ctx) <= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void asset_close(void *ctx) {
|
||||||
|
AAsset_close((AAsset *) ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct whisper_context *whisper_init_from_asset(
|
||||||
|
JNIEnv *env,
|
||||||
|
jobject assetManager,
|
||||||
|
const char *asset_path
|
||||||
|
) {
|
||||||
|
LOGI("Loading model from asset '%s'\n", asset_path);
|
||||||
|
AAssetManager *asset_manager = AAssetManager_fromJava(env, assetManager);
|
||||||
|
AAsset *asset = AAssetManager_open(asset_manager, asset_path, AASSET_MODE_STREAMING);
|
||||||
|
if (!asset) {
|
||||||
|
LOGW("Failed to open '%s'\n", asset_path);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
whisper_model_loader loader = {
|
||||||
|
.context = asset,
|
||||||
|
.read = &asset_read,
|
||||||
|
.eof = &asset_is_eof,
|
||||||
|
.close = &asset_close
|
||||||
|
};
|
||||||
|
|
||||||
|
return whisper_init(&loader);
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT jlong JNICALL
|
||||||
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromAsset(
|
||||||
|
JNIEnv *env, jobject thiz, jobject assetManager, jstring asset_path_str) {
|
||||||
|
UNUSED(thiz);
|
||||||
|
struct whisper_context *context = NULL;
|
||||||
|
const char *asset_path_chars = (*env)->GetStringUTFChars(env, asset_path_str, NULL);
|
||||||
|
context = whisper_init_from_asset(env, assetManager, asset_path_chars);
|
||||||
|
(*env)->ReleaseStringUTFChars(env, asset_path_str, asset_path_chars);
|
||||||
|
return (jlong) context;
|
||||||
|
}
|
||||||
|
|
||||||
JNIEXPORT jlong JNICALL
|
JNIEXPORT jlong JNICALL
|
||||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
|
||||||
JNIEnv *env, jobject thiz, jstring model_path_str) {
|
JNIEnv *env, jobject thiz, jstring model_path_str) {
|
||||||
|
@ -12,6 +12,18 @@ fi
|
|||||||
|
|
||||||
models=( "tiny" "base" "small" "medium" "large" )
|
models=( "tiny" "base" "small" "medium" "large" )
|
||||||
|
|
||||||
|
printf "\n"
|
||||||
|
printf "Running memcpy benchmark with 1 thread\n"
|
||||||
|
printf "\n"
|
||||||
|
|
||||||
|
./bench -w 1 -t 1 2>&1
|
||||||
|
|
||||||
|
printf "\n"
|
||||||
|
printf "Running ggml_mul_mat benchmark with " $n_threads " threads\n"
|
||||||
|
printf "\n"
|
||||||
|
|
||||||
|
./bench -w 2 -t $n_threads 2>&1
|
||||||
|
|
||||||
printf "\n"
|
printf "\n"
|
||||||
printf "Running benchmark for all models\n"
|
printf "Running benchmark for all models\n"
|
||||||
printf "This can take a while!\n"
|
printf "This can take a while!\n"
|
||||||
@ -56,4 +68,3 @@ for model in "${models[@]}"; do
|
|||||||
|
|
||||||
printf "| <todo> | <todo> | $config | $model | $n_threads | $load_time | $encode_time | $commit |\n"
|
printf "| <todo> | <todo> | $config | $model | $n_threads | $load_time | $encode_time | $commit |\n"
|
||||||
done
|
done
|
||||||
|
|
||||||
|
15
ggml.c
15
ggml.c
@ -4373,7 +4373,9 @@ static void ggml_compute_forward_mul_mat_f32(
|
|||||||
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
||||||
GGML_ASSERT(nb10 == sizeof(float));
|
GGML_ASSERT(nb10 == sizeof(float));
|
||||||
|
|
||||||
if (params->ith != 0) return;
|
if (params->ith != 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (params->type == GGML_TASK_INIT) {
|
if (params->type == GGML_TASK_INIT) {
|
||||||
return;
|
return;
|
||||||
@ -4616,7 +4618,9 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|||||||
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
|
||||||
GGML_ASSERT(nb10 == sizeof(float));
|
GGML_ASSERT(nb10 == sizeof(float));
|
||||||
|
|
||||||
if (params->ith != 0) return;
|
if (params->ith != 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (params->type == GGML_TASK_INIT) {
|
if (params->type == GGML_TASK_INIT) {
|
||||||
return;
|
return;
|
||||||
@ -7054,7 +7058,7 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
|
|||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
|
|
||||||
//#include <os/lock.h>
|
//#include <os/lock.h>
|
||||||
|
//
|
||||||
//typedef os_unfair_lock ggml_lock_t;
|
//typedef os_unfair_lock ggml_lock_t;
|
||||||
//
|
//
|
||||||
//#define ggml_lock_init(x) UNUSED(x)
|
//#define ggml_lock_init(x) UNUSED(x)
|
||||||
@ -7161,6 +7165,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|||||||
if (state->params.ith < state->params.nth) {
|
if (state->params.ith < state->params.nth) {
|
||||||
ggml_compute_forward(&state->params, state->node);
|
ggml_compute_forward(&state->params, state->node);
|
||||||
}
|
}
|
||||||
|
|
||||||
state->node = NULL;
|
state->node = NULL;
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
@ -7205,6 +7210,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
.node = NULL,
|
.node = NULL,
|
||||||
.shared = &state_shared,
|
.shared = &state_shared,
|
||||||
};
|
};
|
||||||
|
|
||||||
int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
|
int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
|
||||||
assert(rc == 0);
|
assert(rc == 0);
|
||||||
UNUSED(rc);
|
UNUSED(rc);
|
||||||
@ -7273,7 +7279,8 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
node->src1->type == GGML_TYPE_F32) {
|
node->src1->type == GGML_TYPE_F32) {
|
||||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||||
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
|
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
|
||||||
node->n_tasks = 1;
|
node->n_tasks = 1; // TODO: this actually is doing nothing
|
||||||
|
// the threads are still spinning
|
||||||
cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]);
|
cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]);
|
||||||
} else {
|
} else {
|
||||||
cur = sizeof(ggml_fp16_t)*ggml_nelements(node->src1);
|
cur = sizeof(ggml_fp16_t)*ggml_nelements(node->src1);
|
||||||
|
146
whisper.cpp
146
whisper.cpp
@ -3091,10 +3091,10 @@ static std::vector<whisper_token_data> whisper_sample_token_topk(
|
|||||||
std::vector<whisper_token_data> result;
|
std::vector<whisper_token_data> result;
|
||||||
result.reserve(k);
|
result.reserve(k);
|
||||||
|
|
||||||
whisper_token tid;
|
whisper_token tid = vocab.token_beg;
|
||||||
|
|
||||||
float pt;
|
float pt = 0.0;
|
||||||
float ptsum;
|
float ptsum = 0.0;
|
||||||
|
|
||||||
{
|
{
|
||||||
double sum_ts = 0.0;
|
double sum_ts = 0.0;
|
||||||
@ -3801,6 +3801,7 @@ int whisper_full(
|
|||||||
|
|
||||||
if (tokens_cur[i].id > whisper_token_beg(ctx) && !params.single_segment) {
|
if (tokens_cur[i].id > whisper_token_beg(ctx) && !params.single_segment) {
|
||||||
const auto t1 = seek + 2*(tokens_cur[i].tid - whisper_token_beg(ctx));
|
const auto t1 = seek + 2*(tokens_cur[i].tid - whisper_token_beg(ctx));
|
||||||
|
|
||||||
if (!text.empty()) {
|
if (!text.empty()) {
|
||||||
const auto tt0 = params.speed_up ? 2*t0 : t0;
|
const auto tt0 = params.speed_up ? 2*t0 : t0;
|
||||||
const auto tt1 = params.speed_up ? 2*t1 : t1;
|
const auto tt1 = params.speed_up ? 2*t1 : t1;
|
||||||
@ -4059,6 +4060,145 @@ float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int
|
|||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
|
//
|
||||||
|
// Temporary interface needed for exposing ggml interface
|
||||||
|
// Will be removed in the future when ggml becomes a separate library
|
||||||
|
//
|
||||||
|
|
||||||
|
WHISPER_API int whisper_bench_memcpy(int n_threads) {
|
||||||
|
ggml_time_init();
|
||||||
|
|
||||||
|
size_t n = 50;
|
||||||
|
size_t arr = n_threads > 0 ? 1024 : n_threads; // trick to avoid compiler optimizations
|
||||||
|
|
||||||
|
// 1 GB array
|
||||||
|
const size_t size = arr*1024llu*1024llu;
|
||||||
|
|
||||||
|
char * src = (char *) malloc(size);
|
||||||
|
char * dst = (char *) malloc(size);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; i++) src[i] = i;
|
||||||
|
|
||||||
|
memcpy(dst, src, size); // heat-up
|
||||||
|
|
||||||
|
double tsum = 0.0;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
const int64_t t0 = ggml_time_us();
|
||||||
|
|
||||||
|
memcpy(dst, src, size);
|
||||||
|
|
||||||
|
const int64_t t1 = ggml_time_us();
|
||||||
|
|
||||||
|
tsum += (t1 - t0)*1e-6;
|
||||||
|
|
||||||
|
src[0] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "memcpy: %.2f GB/s\n", (double) (n*size)/(tsum*1024llu*1024llu*1024llu));
|
||||||
|
|
||||||
|
// needed to prevent the compile from optimizing the memcpy away
|
||||||
|
{
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; i++) sum += dst[i];
|
||||||
|
|
||||||
|
fprintf(stderr, "sum: %s\n", sum == -536870910.00 ? "ok" : "error");
|
||||||
|
}
|
||||||
|
|
||||||
|
free(src);
|
||||||
|
free(dst);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
|
||||||
|
ggml_time_init();
|
||||||
|
|
||||||
|
const int n_max = 128;
|
||||||
|
|
||||||
|
const std::vector<size_t> sizes = {
|
||||||
|
64, 128, 256, 512, 1024, 2048, 4096,
|
||||||
|
};
|
||||||
|
|
||||||
|
const size_t N_max = sizes.back();
|
||||||
|
|
||||||
|
// a: N*N*sizeof(float)
|
||||||
|
// b: N*N*sizeof(float)
|
||||||
|
// c: N*N*sizeof(float)
|
||||||
|
// when F16 is used, there is an extra work buffer of size N*N*sizeof(float)
|
||||||
|
std::vector<char> buf(4llu*N_max*N_max*sizeof(float) + 4*256);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < buf.size(); i++) buf[i] = i;
|
||||||
|
|
||||||
|
for (int j = 0; j < (int) sizes.size(); j++) {
|
||||||
|
int n_fp16 = 0;
|
||||||
|
int n_fp32 = 0;
|
||||||
|
|
||||||
|
// GFLOPS/s
|
||||||
|
double s_fp16 = 0.0;
|
||||||
|
double s_fp32 = 0.0;
|
||||||
|
|
||||||
|
const size_t N = sizes[j];
|
||||||
|
|
||||||
|
for (int k = 0; k < 2; ++k) {
|
||||||
|
const ggml_type wtype = k == 0 ? GGML_TYPE_F16 : GGML_TYPE_F32;
|
||||||
|
|
||||||
|
double & s = k == 0 ? s_fp16 : s_fp32;
|
||||||
|
int & n = k == 0 ? n_fp16 : n_fp32;
|
||||||
|
|
||||||
|
struct ggml_init_params gparams = {
|
||||||
|
/*.mem_size =*/ buf.size(),
|
||||||
|
/*.mem_buffer =*/ buf.data(),
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ggml_context * ctx0 = ggml_init(gparams);
|
||||||
|
|
||||||
|
struct ggml_tensor * a = ggml_new_tensor_2d(ctx0, wtype, N, N);
|
||||||
|
struct ggml_tensor * b = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, N, N);
|
||||||
|
|
||||||
|
struct ggml_tensor * c = ggml_mul_mat(ctx0, a, b);
|
||||||
|
|
||||||
|
struct ggml_cgraph gf = ggml_build_forward(c);
|
||||||
|
|
||||||
|
gf.n_threads = n_threads;
|
||||||
|
|
||||||
|
double tsum = 0.0;
|
||||||
|
|
||||||
|
// heat-up
|
||||||
|
ggml_graph_compute(ctx0, &gf);
|
||||||
|
|
||||||
|
for (int i = 0; i < n_max; ++i) {
|
||||||
|
const int64_t t0 = ggml_time_us();
|
||||||
|
|
||||||
|
ggml_graph_compute(ctx0, &gf);
|
||||||
|
|
||||||
|
const int64_t t1 = ggml_time_us();
|
||||||
|
|
||||||
|
tsum += (t1 - t0)*1e-6;
|
||||||
|
n++;
|
||||||
|
|
||||||
|
if (tsum > 1.0 && n >= 3) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_free(ctx0);
|
||||||
|
|
||||||
|
s = ((2.0*N*N*N*n)/tsum)*1e-9;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "ggml_mul_mat: %5zu x %5zu: F16 %8.1f GFLOPS (%3d runs) / F32 %8.1f GFLOPS (%3d runs)\n",
|
||||||
|
N, N, s_fp16, n_fp16, s_fp32, n_fp32);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
//
|
//
|
||||||
// Experimental stuff below
|
// Experimental stuff below
|
||||||
//
|
//
|
||||||
|
@ -245,7 +245,7 @@ extern "C" {
|
|||||||
int duration_ms; // audio duration to process in ms
|
int duration_ms; // audio duration to process in ms
|
||||||
|
|
||||||
bool translate;
|
bool translate;
|
||||||
bool no_context; // do not use initial prompt for the decoder (if any)
|
bool no_context; // do not use past transcription (if any) as initial prompt for the decoder
|
||||||
bool single_segment; // force single segment output (useful for streaming)
|
bool single_segment; // force single segment output (useful for streaming)
|
||||||
bool print_special; // print special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.)
|
bool print_special; // print special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.)
|
||||||
bool print_progress; // print progress information
|
bool print_progress; // print progress information
|
||||||
@ -350,6 +350,13 @@ extern "C" {
|
|||||||
// Get the probability of the specified token in the specified segment.
|
// Get the probability of the specified token in the specified segment.
|
||||||
WHISPER_API float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int i_token);
|
WHISPER_API float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int i_token);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
// Temporary helpers needed for exposing ggml interface
|
||||||
|
|
||||||
|
WHISPER_API int whisper_bench_memcpy(int n_threads);
|
||||||
|
WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user