mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-07-03 16:01:03 +02:00
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
bf5d4c81b9 |
6
Makefile
6
Makefile
@ -34,6 +34,12 @@ CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
|
||||
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
|
||||
LDFLAGS =
|
||||
|
||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/37
|
||||
ifneq (,$(findstring musl,$(CCV)))
|
||||
CFLAGS += -D_POSIX_SOURCE -D_GNU_SOURCE
|
||||
CXXFLAGS += -D_POSIX_SOURCE -D_GNU_SOURCE
|
||||
endif
|
||||
|
||||
# OS specific
|
||||
# TODO: support Windows
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
|
13
README.md
13
README.md
@ -433,19 +433,6 @@ https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a
|
||||
|
||||
---
|
||||
|
||||
## Video comparison of different models
|
||||
|
||||
Use the [extra/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/extra/bench-wts.sh) script to generate a video in the following format:
|
||||
|
||||
```java
|
||||
./extra/bench-wts.sh samples/jfk.wav
|
||||
ffplay ./samples/jfk.wav.all.mp4
|
||||
```
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4
|
||||
|
||||
---
|
||||
|
||||
## Benchmarks
|
||||
|
||||
In order to have an objective comparison of the performance of the inference across different system configurations,
|
||||
|
@ -9,4 +9,4 @@ To use:
|
||||
5. Select the "release" active build variant, and use Android Studio to run and deploy to your device.
|
||||
[^1]: I recommend the tiny or base models for running on an Android device.
|
||||
|
||||
<img width="300" alt="image" src="https://user-images.githubusercontent.com/1670775/221613663-a17bf770-27ef-45ab-9a46-a5f99ba65d2a.jpg">
|
||||
<img width="300" alt="image" src="https://user-images.githubusercontent.com/1991296/208154256-82d972dc-221b-48c4-bfcb-36ce68602f93.png">
|
||||
|
@ -2,7 +2,6 @@ package com.whispercppdemo.ui.main
|
||||
|
||||
import androidx.compose.foundation.layout.*
|
||||
import androidx.compose.foundation.rememberScrollState
|
||||
import androidx.compose.foundation.text.selection.SelectionContainer
|
||||
import androidx.compose.foundation.verticalScroll
|
||||
import androidx.compose.material3.*
|
||||
import androidx.compose.runtime.Composable
|
||||
@ -20,7 +19,6 @@ fun MainScreen(viewModel: MainScreenViewModel) {
|
||||
canTranscribe = viewModel.canTranscribe,
|
||||
isRecording = viewModel.isRecording,
|
||||
messageLog = viewModel.dataLog,
|
||||
onBenchmarkTapped = viewModel::benchmark,
|
||||
onTranscribeSampleTapped = viewModel::transcribeSample,
|
||||
onRecordTapped = viewModel::toggleRecord
|
||||
)
|
||||
@ -32,7 +30,6 @@ private fun MainScreen(
|
||||
canTranscribe: Boolean,
|
||||
isRecording: Boolean,
|
||||
messageLog: String,
|
||||
onBenchmarkTapped: () -> Unit,
|
||||
onTranscribeSampleTapped: () -> Unit,
|
||||
onRecordTapped: () -> Unit
|
||||
) {
|
||||
@ -48,11 +45,8 @@ private fun MainScreen(
|
||||
.padding(innerPadding)
|
||||
.padding(16.dp)
|
||||
) {
|
||||
Column(verticalArrangement = Arrangement.SpaceBetween) {
|
||||
Row(horizontalArrangement = Arrangement.SpaceBetween, modifier = Modifier.fillMaxWidth()) {
|
||||
BenchmarkButton(enabled = canTranscribe, onClick = onBenchmarkTapped)
|
||||
Row(horizontalArrangement = Arrangement.SpaceBetween) {
|
||||
TranscribeSampleButton(enabled = canTranscribe, onClick = onTranscribeSampleTapped)
|
||||
}
|
||||
RecordButton(
|
||||
enabled = canTranscribe,
|
||||
isRecording = isRecording,
|
||||
@ -66,17 +60,8 @@ private fun MainScreen(
|
||||
|
||||
@Composable
|
||||
private fun MessageLog(log: String) {
|
||||
SelectionContainer() {
|
||||
Text(modifier = Modifier.verticalScroll(rememberScrollState()), text = log)
|
||||
}
|
||||
}
|
||||
|
||||
@Composable
|
||||
private fun BenchmarkButton(enabled: Boolean, onClick: () -> Unit) {
|
||||
Button(onClick = onClick, enabled = enabled) {
|
||||
Text("Benchmark")
|
||||
}
|
||||
}
|
||||
|
||||
@Composable
|
||||
private fun TranscribeSampleButton(enabled: Boolean, onClick: () -> Unit) {
|
||||
|
@ -41,15 +41,10 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
||||
|
||||
init {
|
||||
viewModelScope.launch {
|
||||
printSystemInfo()
|
||||
loadData()
|
||||
}
|
||||
}
|
||||
|
||||
private suspend fun printSystemInfo() {
|
||||
printMessage(String.format("System Info: %s\n", WhisperContext.getSystemInfo()));
|
||||
}
|
||||
|
||||
private suspend fun loadData() {
|
||||
printMessage("Loading data...\n")
|
||||
try {
|
||||
@ -86,29 +81,10 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
||||
//whisperContext = WhisperContext.createContextFromFile(firstModel.absolutePath)
|
||||
}
|
||||
|
||||
fun benchmark() = viewModelScope.launch {
|
||||
runBenchmark(6)
|
||||
}
|
||||
|
||||
fun transcribeSample() = viewModelScope.launch {
|
||||
transcribeAudio(getFirstSample())
|
||||
}
|
||||
|
||||
private suspend fun runBenchmark(nthreads: Int) {
|
||||
if (!canTranscribe) {
|
||||
return
|
||||
}
|
||||
|
||||
canTranscribe = false
|
||||
|
||||
printMessage("Running benchmark. This will take minutes...\n")
|
||||
whisperContext?.benchMemory(nthreads)?.let{ printMessage(it) }
|
||||
printMessage("\n")
|
||||
whisperContext?.benchGgmlMulMat(nthreads)?.let{ printMessage(it) }
|
||||
|
||||
canTranscribe = true
|
||||
}
|
||||
|
||||
private suspend fun getFirstSample(): File = withContext(Dispatchers.IO) {
|
||||
samplesPath.listFiles()!!.first()
|
||||
}
|
||||
@ -138,14 +114,11 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
||||
canTranscribe = false
|
||||
|
||||
try {
|
||||
printMessage("Reading wave samples... ")
|
||||
printMessage("Reading wave samples...\n")
|
||||
val data = readAudioSamples(file)
|
||||
printMessage("${data.size / (16000 / 1000)} ms\n")
|
||||
printMessage("Transcribing data...\n")
|
||||
val start = System.currentTimeMillis()
|
||||
val text = whisperContext?.transcribeData(data)
|
||||
val elapsed = System.currentTimeMillis() - start
|
||||
printMessage("Done ($elapsed ms): $text\n")
|
||||
printMessage("Done: $text\n")
|
||||
} catch (e: Exception) {
|
||||
Log.w(LOG_TAG, e)
|
||||
printMessage("${e.localizedMessage}\n")
|
||||
|
@ -27,14 +27,6 @@ class WhisperContext private constructor(private var ptr: Long) {
|
||||
}
|
||||
}
|
||||
|
||||
suspend fun benchMemory(nthreads: Int): String = withContext(scope.coroutineContext) {
|
||||
return@withContext WhisperLib.benchMemcpy(nthreads)
|
||||
}
|
||||
|
||||
suspend fun benchGgmlMulMat(nthreads: Int): String = withContext(scope.coroutineContext) {
|
||||
return@withContext WhisperLib.benchGgmlMulMat(nthreads)
|
||||
}
|
||||
|
||||
suspend fun release() = withContext(scope.coroutineContext) {
|
||||
if (ptr != 0L) {
|
||||
WhisperLib.freeContext(ptr)
|
||||
@ -74,10 +66,6 @@ class WhisperContext private constructor(private var ptr: Long) {
|
||||
}
|
||||
return WhisperContext(ptr)
|
||||
}
|
||||
|
||||
fun getSystemInfo(): String {
|
||||
return WhisperLib.getSystemInfo()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -129,9 +117,6 @@ private class WhisperLib {
|
||||
external fun fullTranscribe(contextPtr: Long, audioData: FloatArray)
|
||||
external fun getTextSegmentCount(contextPtr: Long): Int
|
||||
external fun getTextSegment(contextPtr: Long, index: Int): String
|
||||
external fun getSystemInfo(): String
|
||||
external fun benchMemcpy(nthread: Int): String
|
||||
external fun benchGgmlMulMat(nthread: Int): String
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <sys/sysinfo.h>
|
||||
#include <string.h>
|
||||
#include "whisper.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#define UNUSED(x) (void)(x)
|
||||
#define TAG "JNI"
|
||||
@ -215,29 +214,3 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegment(
|
||||
jstring string = (*env)->NewStringUTF(env, text);
|
||||
return string;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getSystemInfo(
|
||||
JNIEnv *env, jobject thiz
|
||||
) {
|
||||
UNUSED(thiz);
|
||||
const char *sysinfo = whisper_print_system_info();
|
||||
jstring string = (*env)->NewStringUTF(env, sysinfo);
|
||||
return string;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz,
|
||||
jint n_threads) {
|
||||
UNUSED(thiz);
|
||||
const char *bench_ggml_memcpy = whisper_bench_memcpy_str(n_threads);
|
||||
jstring string = (*env)->NewStringUTF(env, bench_ggml_memcpy);
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz,
|
||||
jint n_threads) {
|
||||
UNUSED(thiz);
|
||||
const char *bench_ggml_mul_mat = whisper_bench_ggml_mul_mat_str(n_threads);
|
||||
jstring string = (*env)->NewStringUTF(env, bench_ggml_mul_mat);
|
||||
}
|
||||
|
2
ggml.c
2
ggml.c
@ -79,7 +79,7 @@ typedef void* thread_ret_t;
|
||||
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
||||
#endif
|
||||
|
||||
#define GGML_PERF
|
||||
/*#define GGML_PERF*/
|
||||
#define GGML_DEBUG 0
|
||||
#define GGML_GELU_FP16
|
||||
|
||||
|
37
whisper.cpp
37
whisper.cpp
@ -2160,12 +2160,6 @@ static bool whisper_decode_internal(
|
||||
ggml_graph_compute (ctx0, &gf);
|
||||
}
|
||||
|
||||
// print the time for computing the last ggml_mul_mat that computes logits
|
||||
// also print the total decoder time
|
||||
// these need to be called after ggml_graph_compute()
|
||||
printf("logits t = %7.3f ms (%2d runs, N = %3d, ggml_mul_mat: [%d x %d] * [%d x %d])\n", 1e-3*double(logits->perf_time_us)/logits->perf_runs, logits->perf_runs, N, logits->ne[0], logits->ne[1], cur->ne[1], cur->ne[0]);
|
||||
printf("total t = %7.3f ms (%2d runs)\n", 1e-3*double(gf.perf_time_us)/gf.perf_runs, gf.perf_runs);
|
||||
|
||||
// extract logits for all N tokens
|
||||
//logits_out.resize(N*n_vocab);
|
||||
//memcpy(logits_out.data(), ggml_get_data(logits), sizeof(float)*N*n_vocab);
|
||||
@ -4557,15 +4551,6 @@ float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int
|
||||
//
|
||||
|
||||
WHISPER_API int whisper_bench_memcpy(int n_threads) {
|
||||
fputs(whisper_bench_memcpy_str(n_threads), stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
|
||||
static std::string s;
|
||||
s = "";
|
||||
char strbuf[256];
|
||||
|
||||
ggml_time_init();
|
||||
|
||||
size_t n = 50;
|
||||
@ -4595,8 +4580,7 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
|
||||
src[0] = rand();
|
||||
}
|
||||
|
||||
snprintf(strbuf, sizeof(strbuf), "memcpy: %.2f GB/s\n", (double) (n*size)/(tsum*1024llu*1024llu*1024llu));
|
||||
s += strbuf;
|
||||
fprintf(stderr, "memcpy: %.2f GB/s\n", (double) (n*size)/(tsum*1024llu*1024llu*1024llu));
|
||||
|
||||
// needed to prevent the compile from optimizing the memcpy away
|
||||
{
|
||||
@ -4604,26 +4588,16 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
|
||||
|
||||
for (size_t i = 0; i < size; i++) sum += dst[i];
|
||||
|
||||
snprintf(strbuf, sizeof(strbuf), "sum: %s %f\n", sum == -536870910.00 ? "ok" : "error", sum);
|
||||
s += strbuf;
|
||||
fprintf(stderr, "sum: %s %f\n", sum == -536870910.00 ? "ok" : "error", sum);
|
||||
}
|
||||
|
||||
free(src);
|
||||
free(dst);
|
||||
|
||||
return s.c_str();
|
||||
}
|
||||
|
||||
WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
|
||||
fputs(whisper_bench_ggml_mul_mat_str(n_threads), stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
|
||||
static std::string s;
|
||||
s = "";
|
||||
char strbuf[256];
|
||||
|
||||
WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
|
||||
ggml_time_init();
|
||||
|
||||
const int n_max = 128;
|
||||
@ -4699,12 +4673,11 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
|
||||
s = ((2.0*N*N*N*n)/tsum)*1e-9;
|
||||
}
|
||||
|
||||
snprintf(strbuf, sizeof(strbuf), "ggml_mul_mat: %5zu x %5zu: F16 %8.1f GFLOPS (%3d runs) / F32 %8.1f GFLOPS (%3d runs)\n",
|
||||
fprintf(stderr, "ggml_mul_mat: %5zu x %5zu: F16 %8.1f GFLOPS (%3d runs) / F32 %8.1f GFLOPS (%3d runs)\n",
|
||||
N, N, s_fp16, n_fp16, s_fp32, n_fp32);
|
||||
s += strbuf;
|
||||
}
|
||||
|
||||
return s.c_str();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
@ -462,9 +462,7 @@ extern "C" {
|
||||
// Temporary helpers needed for exposing ggml interface
|
||||
|
||||
WHISPER_API int whisper_bench_memcpy(int n_threads);
|
||||
WHISPER_API const char * whisper_bench_memcpy_str(int n_threads);
|
||||
WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads);
|
||||
WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
Reference in New Issue
Block a user