forked from extern/whisper.cpp
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
bf5d4c81b9 |
6
Makefile
6
Makefile
@ -34,6 +34,12 @@ CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
|
|||||||
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
|
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
|
||||||
LDFLAGS =
|
LDFLAGS =
|
||||||
|
|
||||||
|
# ref: https://github.com/ggerganov/whisper.cpp/issues/37
|
||||||
|
ifneq (,$(findstring musl,$(CCV)))
|
||||||
|
CFLAGS += -D_POSIX_SOURCE -D_GNU_SOURCE
|
||||||
|
CXXFLAGS += -D_POSIX_SOURCE -D_GNU_SOURCE
|
||||||
|
endif
|
||||||
|
|
||||||
# OS specific
|
# OS specific
|
||||||
# TODO: support Windows
|
# TODO: support Windows
|
||||||
ifeq ($(UNAME_S),Linux)
|
ifeq ($(UNAME_S),Linux)
|
||||||
|
13
README.md
13
README.md
@ -433,19 +433,6 @@ https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Video comparison of different models
|
|
||||||
|
|
||||||
Use the [extra/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/extra/bench-wts.sh) script to generate a video in the following format:
|
|
||||||
|
|
||||||
```java
|
|
||||||
./extra/bench-wts.sh samples/jfk.wav
|
|
||||||
ffplay ./samples/jfk.wav.all.mp4
|
|
||||||
```
|
|
||||||
|
|
||||||
https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Benchmarks
|
## Benchmarks
|
||||||
|
|
||||||
In order to have an objective comparison of the performance of the inference across different system configurations,
|
In order to have an objective comparison of the performance of the inference across different system configurations,
|
||||||
|
@ -9,4 +9,4 @@ To use:
|
|||||||
5. Select the "release" active build variant, and use Android Studio to run and deploy to your device.
|
5. Select the "release" active build variant, and use Android Studio to run and deploy to your device.
|
||||||
[^1]: I recommend the tiny or base models for running on an Android device.
|
[^1]: I recommend the tiny or base models for running on an Android device.
|
||||||
|
|
||||||
<img width="300" alt="image" src="https://user-images.githubusercontent.com/1670775/221613663-a17bf770-27ef-45ab-9a46-a5f99ba65d2a.jpg">
|
<img width="300" alt="image" src="https://user-images.githubusercontent.com/1991296/208154256-82d972dc-221b-48c4-bfcb-36ce68602f93.png">
|
||||||
|
@ -2,7 +2,6 @@ package com.whispercppdemo.ui.main
|
|||||||
|
|
||||||
import androidx.compose.foundation.layout.*
|
import androidx.compose.foundation.layout.*
|
||||||
import androidx.compose.foundation.rememberScrollState
|
import androidx.compose.foundation.rememberScrollState
|
||||||
import androidx.compose.foundation.text.selection.SelectionContainer
|
|
||||||
import androidx.compose.foundation.verticalScroll
|
import androidx.compose.foundation.verticalScroll
|
||||||
import androidx.compose.material3.*
|
import androidx.compose.material3.*
|
||||||
import androidx.compose.runtime.Composable
|
import androidx.compose.runtime.Composable
|
||||||
@ -20,7 +19,6 @@ fun MainScreen(viewModel: MainScreenViewModel) {
|
|||||||
canTranscribe = viewModel.canTranscribe,
|
canTranscribe = viewModel.canTranscribe,
|
||||||
isRecording = viewModel.isRecording,
|
isRecording = viewModel.isRecording,
|
||||||
messageLog = viewModel.dataLog,
|
messageLog = viewModel.dataLog,
|
||||||
onBenchmarkTapped = viewModel::benchmark,
|
|
||||||
onTranscribeSampleTapped = viewModel::transcribeSample,
|
onTranscribeSampleTapped = viewModel::transcribeSample,
|
||||||
onRecordTapped = viewModel::toggleRecord
|
onRecordTapped = viewModel::toggleRecord
|
||||||
)
|
)
|
||||||
@ -32,7 +30,6 @@ private fun MainScreen(
|
|||||||
canTranscribe: Boolean,
|
canTranscribe: Boolean,
|
||||||
isRecording: Boolean,
|
isRecording: Boolean,
|
||||||
messageLog: String,
|
messageLog: String,
|
||||||
onBenchmarkTapped: () -> Unit,
|
|
||||||
onTranscribeSampleTapped: () -> Unit,
|
onTranscribeSampleTapped: () -> Unit,
|
||||||
onRecordTapped: () -> Unit
|
onRecordTapped: () -> Unit
|
||||||
) {
|
) {
|
||||||
@ -48,11 +45,8 @@ private fun MainScreen(
|
|||||||
.padding(innerPadding)
|
.padding(innerPadding)
|
||||||
.padding(16.dp)
|
.padding(16.dp)
|
||||||
) {
|
) {
|
||||||
Column(verticalArrangement = Arrangement.SpaceBetween) {
|
Row(horizontalArrangement = Arrangement.SpaceBetween) {
|
||||||
Row(horizontalArrangement = Arrangement.SpaceBetween, modifier = Modifier.fillMaxWidth()) {
|
|
||||||
BenchmarkButton(enabled = canTranscribe, onClick = onBenchmarkTapped)
|
|
||||||
TranscribeSampleButton(enabled = canTranscribe, onClick = onTranscribeSampleTapped)
|
TranscribeSampleButton(enabled = canTranscribe, onClick = onTranscribeSampleTapped)
|
||||||
}
|
|
||||||
RecordButton(
|
RecordButton(
|
||||||
enabled = canTranscribe,
|
enabled = canTranscribe,
|
||||||
isRecording = isRecording,
|
isRecording = isRecording,
|
||||||
@ -66,17 +60,8 @@ private fun MainScreen(
|
|||||||
|
|
||||||
@Composable
|
@Composable
|
||||||
private fun MessageLog(log: String) {
|
private fun MessageLog(log: String) {
|
||||||
SelectionContainer() {
|
|
||||||
Text(modifier = Modifier.verticalScroll(rememberScrollState()), text = log)
|
Text(modifier = Modifier.verticalScroll(rememberScrollState()), text = log)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
@Composable
|
|
||||||
private fun BenchmarkButton(enabled: Boolean, onClick: () -> Unit) {
|
|
||||||
Button(onClick = onClick, enabled = enabled) {
|
|
||||||
Text("Benchmark")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Composable
|
@Composable
|
||||||
private fun TranscribeSampleButton(enabled: Boolean, onClick: () -> Unit) {
|
private fun TranscribeSampleButton(enabled: Boolean, onClick: () -> Unit) {
|
||||||
|
@ -41,15 +41,10 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
|||||||
|
|
||||||
init {
|
init {
|
||||||
viewModelScope.launch {
|
viewModelScope.launch {
|
||||||
printSystemInfo()
|
|
||||||
loadData()
|
loadData()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private suspend fun printSystemInfo() {
|
|
||||||
printMessage(String.format("System Info: %s\n", WhisperContext.getSystemInfo()));
|
|
||||||
}
|
|
||||||
|
|
||||||
private suspend fun loadData() {
|
private suspend fun loadData() {
|
||||||
printMessage("Loading data...\n")
|
printMessage("Loading data...\n")
|
||||||
try {
|
try {
|
||||||
@ -86,29 +81,10 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
|||||||
//whisperContext = WhisperContext.createContextFromFile(firstModel.absolutePath)
|
//whisperContext = WhisperContext.createContextFromFile(firstModel.absolutePath)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun benchmark() = viewModelScope.launch {
|
|
||||||
runBenchmark(6)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun transcribeSample() = viewModelScope.launch {
|
fun transcribeSample() = viewModelScope.launch {
|
||||||
transcribeAudio(getFirstSample())
|
transcribeAudio(getFirstSample())
|
||||||
}
|
}
|
||||||
|
|
||||||
private suspend fun runBenchmark(nthreads: Int) {
|
|
||||||
if (!canTranscribe) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
canTranscribe = false
|
|
||||||
|
|
||||||
printMessage("Running benchmark. This will take minutes...\n")
|
|
||||||
whisperContext?.benchMemory(nthreads)?.let{ printMessage(it) }
|
|
||||||
printMessage("\n")
|
|
||||||
whisperContext?.benchGgmlMulMat(nthreads)?.let{ printMessage(it) }
|
|
||||||
|
|
||||||
canTranscribe = true
|
|
||||||
}
|
|
||||||
|
|
||||||
private suspend fun getFirstSample(): File = withContext(Dispatchers.IO) {
|
private suspend fun getFirstSample(): File = withContext(Dispatchers.IO) {
|
||||||
samplesPath.listFiles()!!.first()
|
samplesPath.listFiles()!!.first()
|
||||||
}
|
}
|
||||||
@ -138,14 +114,11 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
|||||||
canTranscribe = false
|
canTranscribe = false
|
||||||
|
|
||||||
try {
|
try {
|
||||||
printMessage("Reading wave samples... ")
|
printMessage("Reading wave samples...\n")
|
||||||
val data = readAudioSamples(file)
|
val data = readAudioSamples(file)
|
||||||
printMessage("${data.size / (16000 / 1000)} ms\n")
|
|
||||||
printMessage("Transcribing data...\n")
|
printMessage("Transcribing data...\n")
|
||||||
val start = System.currentTimeMillis()
|
|
||||||
val text = whisperContext?.transcribeData(data)
|
val text = whisperContext?.transcribeData(data)
|
||||||
val elapsed = System.currentTimeMillis() - start
|
printMessage("Done: $text\n")
|
||||||
printMessage("Done ($elapsed ms): $text\n")
|
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
Log.w(LOG_TAG, e)
|
Log.w(LOG_TAG, e)
|
||||||
printMessage("${e.localizedMessage}\n")
|
printMessage("${e.localizedMessage}\n")
|
||||||
|
@ -27,14 +27,6 @@ class WhisperContext private constructor(private var ptr: Long) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
suspend fun benchMemory(nthreads: Int): String = withContext(scope.coroutineContext) {
|
|
||||||
return@withContext WhisperLib.benchMemcpy(nthreads)
|
|
||||||
}
|
|
||||||
|
|
||||||
suspend fun benchGgmlMulMat(nthreads: Int): String = withContext(scope.coroutineContext) {
|
|
||||||
return@withContext WhisperLib.benchGgmlMulMat(nthreads)
|
|
||||||
}
|
|
||||||
|
|
||||||
suspend fun release() = withContext(scope.coroutineContext) {
|
suspend fun release() = withContext(scope.coroutineContext) {
|
||||||
if (ptr != 0L) {
|
if (ptr != 0L) {
|
||||||
WhisperLib.freeContext(ptr)
|
WhisperLib.freeContext(ptr)
|
||||||
@ -74,10 +66,6 @@ class WhisperContext private constructor(private var ptr: Long) {
|
|||||||
}
|
}
|
||||||
return WhisperContext(ptr)
|
return WhisperContext(ptr)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun getSystemInfo(): String {
|
|
||||||
return WhisperLib.getSystemInfo()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -129,9 +117,6 @@ private class WhisperLib {
|
|||||||
external fun fullTranscribe(contextPtr: Long, audioData: FloatArray)
|
external fun fullTranscribe(contextPtr: Long, audioData: FloatArray)
|
||||||
external fun getTextSegmentCount(contextPtr: Long): Int
|
external fun getTextSegmentCount(contextPtr: Long): Int
|
||||||
external fun getTextSegment(contextPtr: Long, index: Int): String
|
external fun getTextSegment(contextPtr: Long, index: Int): String
|
||||||
external fun getSystemInfo(): String
|
|
||||||
external fun benchMemcpy(nthread: Int): String
|
|
||||||
external fun benchGgmlMulMat(nthread: Int): String
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
#include <sys/sysinfo.h>
|
#include <sys/sysinfo.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "whisper.h"
|
#include "whisper.h"
|
||||||
#include "ggml.h"
|
|
||||||
|
|
||||||
#define UNUSED(x) (void)(x)
|
#define UNUSED(x) (void)(x)
|
||||||
#define TAG "JNI"
|
#define TAG "JNI"
|
||||||
@ -215,29 +214,3 @@ Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegment(
|
|||||||
jstring string = (*env)->NewStringUTF(env, text);
|
jstring string = (*env)->NewStringUTF(env, text);
|
||||||
return string;
|
return string;
|
||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT jstring JNICALL
|
|
||||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getSystemInfo(
|
|
||||||
JNIEnv *env, jobject thiz
|
|
||||||
) {
|
|
||||||
UNUSED(thiz);
|
|
||||||
const char *sysinfo = whisper_print_system_info();
|
|
||||||
jstring string = (*env)->NewStringUTF(env, sysinfo);
|
|
||||||
return string;
|
|
||||||
}
|
|
||||||
|
|
||||||
JNIEXPORT jstring JNICALL
|
|
||||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz,
|
|
||||||
jint n_threads) {
|
|
||||||
UNUSED(thiz);
|
|
||||||
const char *bench_ggml_memcpy = whisper_bench_memcpy_str(n_threads);
|
|
||||||
jstring string = (*env)->NewStringUTF(env, bench_ggml_memcpy);
|
|
||||||
}
|
|
||||||
|
|
||||||
JNIEXPORT jstring JNICALL
|
|
||||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz,
|
|
||||||
jint n_threads) {
|
|
||||||
UNUSED(thiz);
|
|
||||||
const char *bench_ggml_mul_mat = whisper_bench_ggml_mul_mat_str(n_threads);
|
|
||||||
jstring string = (*env)->NewStringUTF(env, bench_ggml_mul_mat);
|
|
||||||
}
|
|
||||||
|
31
whisper.cpp
31
whisper.cpp
@ -4551,15 +4551,6 @@ float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int
|
|||||||
//
|
//
|
||||||
|
|
||||||
WHISPER_API int whisper_bench_memcpy(int n_threads) {
|
WHISPER_API int whisper_bench_memcpy(int n_threads) {
|
||||||
fputs(whisper_bench_memcpy_str(n_threads), stderr);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
|
|
||||||
static std::string s;
|
|
||||||
s = "";
|
|
||||||
char strbuf[256];
|
|
||||||
|
|
||||||
ggml_time_init();
|
ggml_time_init();
|
||||||
|
|
||||||
size_t n = 50;
|
size_t n = 50;
|
||||||
@ -4589,8 +4580,7 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
|
|||||||
src[0] = rand();
|
src[0] = rand();
|
||||||
}
|
}
|
||||||
|
|
||||||
snprintf(strbuf, sizeof(strbuf), "memcpy: %.2f GB/s\n", (double) (n*size)/(tsum*1024llu*1024llu*1024llu));
|
fprintf(stderr, "memcpy: %.2f GB/s\n", (double) (n*size)/(tsum*1024llu*1024llu*1024llu));
|
||||||
s += strbuf;
|
|
||||||
|
|
||||||
// needed to prevent the compile from optimizing the memcpy away
|
// needed to prevent the compile from optimizing the memcpy away
|
||||||
{
|
{
|
||||||
@ -4598,26 +4588,16 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
|
|||||||
|
|
||||||
for (size_t i = 0; i < size; i++) sum += dst[i];
|
for (size_t i = 0; i < size; i++) sum += dst[i];
|
||||||
|
|
||||||
snprintf(strbuf, sizeof(strbuf), "sum: %s %f\n", sum == -536870910.00 ? "ok" : "error", sum);
|
fprintf(stderr, "sum: %s %f\n", sum == -536870910.00 ? "ok" : "error", sum);
|
||||||
s += strbuf;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
free(src);
|
free(src);
|
||||||
free(dst);
|
free(dst);
|
||||||
|
|
||||||
return s.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
|
|
||||||
fputs(whisper_bench_ggml_mul_mat_str(n_threads), stderr);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
|
WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
|
||||||
static std::string s;
|
|
||||||
s = "";
|
|
||||||
char strbuf[256];
|
|
||||||
|
|
||||||
ggml_time_init();
|
ggml_time_init();
|
||||||
|
|
||||||
const int n_max = 128;
|
const int n_max = 128;
|
||||||
@ -4693,12 +4673,11 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
|
|||||||
s = ((2.0*N*N*N*n)/tsum)*1e-9;
|
s = ((2.0*N*N*N*n)/tsum)*1e-9;
|
||||||
}
|
}
|
||||||
|
|
||||||
snprintf(strbuf, sizeof(strbuf), "ggml_mul_mat: %5zu x %5zu: F16 %8.1f GFLOPS (%3d runs) / F32 %8.1f GFLOPS (%3d runs)\n",
|
fprintf(stderr, "ggml_mul_mat: %5zu x %5zu: F16 %8.1f GFLOPS (%3d runs) / F32 %8.1f GFLOPS (%3d runs)\n",
|
||||||
N, N, s_fp16, n_fp16, s_fp32, n_fp32);
|
N, N, s_fp16, n_fp16, s_fp32, n_fp32);
|
||||||
s += strbuf;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return s.c_str();
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
@ -462,9 +462,7 @@ extern "C" {
|
|||||||
// Temporary helpers needed for exposing ggml interface
|
// Temporary helpers needed for exposing ggml interface
|
||||||
|
|
||||||
WHISPER_API int whisper_bench_memcpy(int n_threads);
|
WHISPER_API int whisper_bench_memcpy(int n_threads);
|
||||||
WHISPER_API const char * whisper_bench_memcpy_str(int n_threads);
|
|
||||||
WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads);
|
WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads);
|
||||||
WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user