mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-07-01 15:00:31 +02:00
Compare commits
1 Commits
ggml-backe
...
try-fix-ab
Author | SHA1 | Date | |
---|---|---|---|
aaa3b5e5f6 |
20
.github/workflows/build.yml
vendored
20
.github/workflows/build.yml
vendored
@ -88,7 +88,7 @@ jobs:
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
apt update
|
||||
apt install -y build-essential cmake libsdl2-dev
|
||||
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
make
|
||||
ctest -L gh --output-on-failure'
|
||||
|
||||
@ -115,7 +115,7 @@ jobs:
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
apt update
|
||||
apt install -y build-essential cmake libsdl2-dev
|
||||
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
|
||||
cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
|
||||
make
|
||||
ctest -L gh --output-on-failure'
|
||||
|
||||
@ -182,7 +182,7 @@ jobs:
|
||||
run: >
|
||||
cmake -S . -B ./build -A ${{ matrix.arch }}
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
||||
-DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
@ -217,10 +217,10 @@ jobs:
|
||||
sdl2: [ON]
|
||||
include:
|
||||
- arch: Win32
|
||||
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.24/OpenBLAS-0.3.24-x86.zip
|
||||
obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
|
||||
s2arc: x86
|
||||
- arch: x64
|
||||
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.24/OpenBLAS-0.3.24-x64.zip
|
||||
obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
|
||||
s2arc: x64
|
||||
- sdl2: ON
|
||||
s2ver: 2.26.0
|
||||
@ -239,7 +239,7 @@ jobs:
|
||||
7z x blas.zip -oblas -y
|
||||
copy blas/include/cblas.h .
|
||||
copy blas/include/openblas_config.h .
|
||||
echo "OPENBLAS_PATH=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
|
||||
echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
|
||||
|
||||
- name: Fetch SDL2 and set SDL2_DIR
|
||||
if: matrix.sdl2 == 'ON'
|
||||
@ -252,9 +252,9 @@ jobs:
|
||||
run: >
|
||||
cmake -S . -B ./build -A ${{ matrix.arch }}
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
-DWHISPER_OPENBLAS=${{ matrix.blas }}
|
||||
-DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib"
|
||||
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
||||
-DWHISPER_SUPPORT_OPENBLAS=${{ matrix.blas }}
|
||||
-DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
|
||||
-DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
@ -263,7 +263,7 @@ jobs:
|
||||
|
||||
- name: Copy libopenblas.dll
|
||||
if: matrix.blas == 'ON'
|
||||
run: copy "$env:OPENBLAS_PATH/bin/libopenblas.dll" build/bin/${{ matrix.build }}
|
||||
run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
|
||||
|
||||
- name: Copy SDL2.dll
|
||||
if: matrix.sdl2 == 'ON'
|
||||
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -18,11 +18,6 @@ build-no-accel/
|
||||
build-sanitize-addr/
|
||||
build-sanitize-thread/
|
||||
|
||||
# SPM
|
||||
.build/
|
||||
.swiftpm
|
||||
*.metallib
|
||||
|
||||
/main
|
||||
/stream
|
||||
/command
|
||||
|
@ -1,6 +1,6 @@
|
||||
cmake_minimum_required (VERSION 3.5)
|
||||
|
||||
project(whisper.cpp VERSION 1.4.3)
|
||||
project(whisper.cpp VERSION 1.4.2)
|
||||
|
||||
# Add path to modules
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||
|
3
Makefile
3
Makefile
@ -417,10 +417,9 @@ samples:
|
||||
.PHONY: medium.en
|
||||
.PHONY: medium
|
||||
.PHONY: large-v1
|
||||
.PHONY: large-v2
|
||||
.PHONY: large
|
||||
|
||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large: main
|
||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
|
||||
bash ./models/download-ggml-model.sh $@
|
||||
@echo ""
|
||||
@echo "==============================================="
|
||||
|
@ -1,77 +0,0 @@
|
||||
// swift-tools-version:5.5
|
||||
|
||||
import PackageDescription
|
||||
|
||||
#if arch(arm) || arch(arm64)
|
||||
let platforms: [SupportedPlatform]? = [
|
||||
.macOS(.v12),
|
||||
.iOS(.v14),
|
||||
.watchOS(.v4),
|
||||
.tvOS(.v14)
|
||||
]
|
||||
let exclude: [String] = []
|
||||
let resources: [Resource] = [
|
||||
.process("ggml-metal.metal")
|
||||
]
|
||||
let additionalSources: [String] = ["ggml-metal.m"]
|
||||
let additionalSettings: [CSetting] = [
|
||||
.unsafeFlags(["-fno-objc-arc"]),
|
||||
.define("GGML_USE_METAL")
|
||||
]
|
||||
#else
|
||||
let platforms: [SupportedPlatform]? = nil
|
||||
let exclude: [String] = ["ggml-metal.metal"]
|
||||
let resources: [Resource] = []
|
||||
let additionalSources: [String] = []
|
||||
let additionalSettings: [CSetting] = []
|
||||
#endif
|
||||
|
||||
let package = Package(
|
||||
name: "whisper",
|
||||
platforms: platforms,
|
||||
products: [
|
||||
.library(name: "whisper", targets: ["whisper"]),
|
||||
],
|
||||
targets: [
|
||||
.target(
|
||||
name: "whisper",
|
||||
path: ".",
|
||||
exclude: exclude + [
|
||||
"bindings",
|
||||
"cmake",
|
||||
"coreml",
|
||||
"examples",
|
||||
"extra",
|
||||
"models",
|
||||
"samples",
|
||||
"tests",
|
||||
"CMakeLists.txt",
|
||||
"ggml-cuda.cu",
|
||||
"ggml-cuda.h",
|
||||
"Makefile"
|
||||
],
|
||||
sources: [
|
||||
"ggml.c",
|
||||
"whisper.cpp",
|
||||
"ggml-alloc.c",
|
||||
"ggml-backend.c",
|
||||
"ggml-quants.c"
|
||||
] + additionalSources,
|
||||
resources: resources,
|
||||
publicHeadersPath: "spm-headers",
|
||||
cSettings: [
|
||||
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
||||
.define("GGML_USE_ACCELERATE")
|
||||
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
||||
// We should consider add this in the future when we drop support for iOS 14
|
||||
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
||||
// .define("ACCELERATE_NEW_LAPACK"),
|
||||
// .define("ACCELERATE_LAPACK_ILP64")
|
||||
] + additionalSettings,
|
||||
linkerSettings: [
|
||||
.linkedFramework("Accelerate")
|
||||
]
|
||||
)
|
||||
],
|
||||
cxxLanguageStandard: .cxx11
|
||||
)
|
@ -6,7 +6,7 @@
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://www.npmjs.com/package/whisper.cpp/)
|
||||
|
||||
Beta: [v1.4.3](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.3) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||
Beta: [v1.4.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.2) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||
|
||||
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
||||
|
||||
@ -234,7 +234,6 @@ make small
|
||||
make medium.en
|
||||
make medium
|
||||
make large-v1
|
||||
make large-v2
|
||||
make large
|
||||
```
|
||||
|
||||
@ -246,7 +245,7 @@ make large
|
||||
| base | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
|
||||
| small | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
|
||||
| medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
|
||||
| large | 2.9 GB | ~3.3 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
|
||||
| large | 2.9 GB | ~3.3 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
|
||||
|
||||
## Quantization
|
||||
|
||||
|
@ -24,7 +24,7 @@ const (
|
||||
|
||||
var (
|
||||
// The models which will be downloaded, if no model is specified as an argument
|
||||
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large"}
|
||||
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large"}
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -83,6 +83,7 @@ const (
|
||||
SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second
|
||||
SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
|
||||
NumFFT = C.WHISPER_N_FFT
|
||||
NumMEL = C.WHISPER_N_MEL
|
||||
HopLength = C.WHISPER_HOP_LENGTH
|
||||
ChunkSize = C.WHISPER_CHUNK_SIZE
|
||||
)
|
||||
@ -102,7 +103,7 @@ var (
|
||||
func Whisper_init(path string) *Context {
|
||||
cPath := C.CString(path)
|
||||
defer C.free(unsafe.Pointer(cPath))
|
||||
if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
|
||||
if ctx := C.whisper_init_from_file(cPath); ctx != nil {
|
||||
return (*Context)(ctx)
|
||||
} else {
|
||||
return nil
|
||||
|
Submodule bindings/ios updated: 9752de4100...44b39fd4ec
@ -4,7 +4,6 @@ import com.sun.jna.Structure;
|
||||
import com.sun.jna.ptr.PointerByReference;
|
||||
import io.github.ggerganov.whispercpp.ggml.GgmlType;
|
||||
import io.github.ggerganov.whispercpp.WhisperModel;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@ -24,9 +23,8 @@ public class WhisperContext extends Structure {
|
||||
public PointerByReference vocab;
|
||||
public PointerByReference state;
|
||||
|
||||
/** populated by whisper_init_from_file_with_params() */
|
||||
/** populated by whisper_init_from_file() */
|
||||
String path_model;
|
||||
WhisperContextParams params;
|
||||
|
||||
// public static class ByReference extends WhisperContext implements Structure.ByReference {
|
||||
// }
|
||||
|
@ -2,7 +2,6 @@ package io.github.ggerganov.whispercpp;
|
||||
|
||||
import com.sun.jna.Native;
|
||||
import com.sun.jna.Pointer;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
||||
|
||||
@ -16,9 +15,8 @@ import java.io.IOException;
|
||||
public class WhisperCpp implements AutoCloseable {
|
||||
private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
|
||||
private Pointer ctx = null;
|
||||
private Pointer paramsPointer = null;
|
||||
private Pointer greedyParamsPointer = null;
|
||||
private Pointer beamParamsPointer = null;
|
||||
private Pointer greedyPointer = null;
|
||||
private Pointer beamPointer = null;
|
||||
|
||||
public File modelDir() {
|
||||
String modelDirPath = System.getenv("XDG_CACHE_HOME");
|
||||
@ -33,18 +31,6 @@ public class WhisperCpp implements AutoCloseable {
|
||||
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
||||
*/
|
||||
public void initContext(String modelPath) throws FileNotFoundException {
|
||||
initContextImpl(modelPath, getContextDefaultParams());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
||||
* @param params - params to use when initialising the context
|
||||
*/
|
||||
public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
||||
initContextImpl(modelPath, params);
|
||||
}
|
||||
|
||||
private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
||||
if (ctx != null) {
|
||||
lib.whisper_free(ctx);
|
||||
}
|
||||
@ -57,26 +43,13 @@ public class WhisperCpp implements AutoCloseable {
|
||||
modelPath = new File(modelDir(), modelPath).getAbsolutePath();
|
||||
}
|
||||
|
||||
ctx = lib.whisper_init_from_file_with_params(modelPath, params);
|
||||
ctx = lib.whisper_init_from_file(modelPath);
|
||||
|
||||
if (ctx == null) {
|
||||
throw new FileNotFoundException(modelPath);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
||||
* Because this function allocates memory for the params, the caller must call either:
|
||||
* - call `whisper_free_context_params()`
|
||||
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||
*/
|
||||
public WhisperContextParams getContextDefaultParams() {
|
||||
paramsPointer = lib.whisper_context_default_params_by_ref();
|
||||
WhisperContextParams params = new WhisperContextParams(paramsPointer);
|
||||
params.read();
|
||||
return params;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides default params which can be used with `whisper_full()` etc.
|
||||
* Because this function allocates memory for the params, the caller must call either:
|
||||
@ -90,15 +63,15 @@ public class WhisperCpp implements AutoCloseable {
|
||||
|
||||
// whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
|
||||
if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) {
|
||||
if (greedyParamsPointer == null) {
|
||||
greedyParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
||||
if (greedyPointer == null) {
|
||||
greedyPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
||||
}
|
||||
pointer = greedyParamsPointer;
|
||||
pointer = greedyPointer;
|
||||
} else {
|
||||
if (beamParamsPointer == null) {
|
||||
beamParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
||||
if (beamPointer == null) {
|
||||
beamPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
||||
}
|
||||
pointer = beamParamsPointer;
|
||||
pointer = beamPointer;
|
||||
}
|
||||
|
||||
WhisperFullParams params = new WhisperFullParams(pointer);
|
||||
@ -120,17 +93,13 @@ public class WhisperCpp implements AutoCloseable {
|
||||
}
|
||||
|
||||
private void freeParams() {
|
||||
if (paramsPointer != null) {
|
||||
Native.free(Pointer.nativeValue(paramsPointer));
|
||||
paramsPointer = null;
|
||||
if (greedyPointer != null) {
|
||||
Native.free(Pointer.nativeValue(greedyPointer));
|
||||
greedyPointer = null;
|
||||
}
|
||||
if (greedyParamsPointer != null) {
|
||||
Native.free(Pointer.nativeValue(greedyParamsPointer));
|
||||
greedyParamsPointer = null;
|
||||
}
|
||||
if (beamParamsPointer != null) {
|
||||
Native.free(Pointer.nativeValue(beamParamsPointer));
|
||||
beamParamsPointer = null;
|
||||
if (beamPointer != null) {
|
||||
Native.free(Pointer.nativeValue(beamPointer));
|
||||
beamPointer = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,7 +5,6 @@ import com.sun.jna.Native;
|
||||
import com.sun.jna.Pointer;
|
||||
import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
|
||||
import io.github.ggerganov.whispercpp.model.WhisperTokenData;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
||||
|
||||
public interface WhisperCppJnaLibrary extends Library {
|
||||
@ -14,31 +13,12 @@ public interface WhisperCppJnaLibrary extends Library {
|
||||
String whisper_print_system_info();
|
||||
|
||||
/**
|
||||
* DEPRECATED. Allocate (almost) all memory needed for the model by loading from a file.
|
||||
* Allocate (almost) all memory needed for the model by loading from a file.
|
||||
*
|
||||
* @param path_model Path to the model file
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_from_file(String path_model);
|
||||
|
||||
/**
|
||||
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
||||
* Because this function allocates memory for the params, the caller must call either:
|
||||
* - call `whisper_free_context_params()`
|
||||
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||
*/
|
||||
Pointer whisper_context_default_params_by_ref();
|
||||
|
||||
void whisper_free_context_params(Pointer params);
|
||||
|
||||
/**
|
||||
* Allocate (almost) all memory needed for the model by loading from a file.
|
||||
*
|
||||
* @param path_model Path to the model file
|
||||
* @param params Pointer to whisper_context_params
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
|
||||
|
||||
/**
|
||||
* Allocate (almost) all memory needed for the model by loading from a buffer.
|
||||
|
@ -1,31 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
import com.sun.jna.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Parameters for the whisper_init_from_file_with_params() function.
|
||||
* If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
|
||||
* whisper_context_default_params()
|
||||
*/
|
||||
public class WhisperContextParams extends Structure {
|
||||
|
||||
public WhisperContextParams(Pointer p) {
|
||||
super(p);
|
||||
}
|
||||
|
||||
/** Use GPU for inference Number (default = true) */
|
||||
public CBool use_gpu;
|
||||
|
||||
/** Use GPU for inference Number (default = true) */
|
||||
public void useGpu(boolean enable) {
|
||||
use_gpu = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("use_gpu");
|
||||
}
|
||||
}
|
@ -20,7 +20,7 @@ struct whisper_context * g_context;
|
||||
EMSCRIPTEN_BINDINGS(whisper) {
|
||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||
if (g_context == nullptr) {
|
||||
g_context = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
||||
g_context = whisper_init_from_file(path_model.c_str());
|
||||
if (g_context != nullptr) {
|
||||
return true;
|
||||
} else {
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "whisper.cpp",
|
||||
"version": "1.4.3",
|
||||
"version": "1.4.2",
|
||||
"description": "Whisper speech recognition",
|
||||
"main": "whisper.js",
|
||||
"scripts": {
|
||||
|
@ -87,7 +87,7 @@ static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
|
||||
if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
|
||||
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
|
||||
}
|
||||
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
|
||||
rw->context = whisper_init_from_file(StringValueCStr(whisper_model_file_path));
|
||||
if (rw->context == nullptr) {
|
||||
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
|
||||
}
|
||||
|
@ -11,7 +11,6 @@ const whisperParamsMock = {
|
||||
language: "en",
|
||||
model: path.join(__dirname, "../../../models/ggml-base.en.bin"),
|
||||
fname_inp: path.join(__dirname, "../../../samples/jfk.wav"),
|
||||
use_gpu: true,
|
||||
};
|
||||
|
||||
describe("Run whisper.node", () => {
|
||||
|
@ -36,7 +36,6 @@ struct whisper_params {
|
||||
bool print_colors = false;
|
||||
bool print_progress = false;
|
||||
bool no_timestamps = false;
|
||||
bool use_gpu = true;
|
||||
|
||||
std::string language = "en";
|
||||
std::string prompt;
|
||||
@ -154,9 +153,7 @@ int run(whisper_params ¶ms, std::vector<std::vector<std::string>> &result) {
|
||||
|
||||
// whisper init
|
||||
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
||||
|
||||
if (ctx == nullptr) {
|
||||
fprintf(stderr, "error: failed to initialize whisper context\n");
|
||||
@ -318,12 +315,10 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
|
||||
std::string language = whisper_params.Get("language").As<Napi::String>();
|
||||
std::string model = whisper_params.Get("model").As<Napi::String>();
|
||||
std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
|
||||
bool use_gpu = whisper_params.Get("use_gpu").As<Napi::Boolean>();
|
||||
|
||||
params.language = language;
|
||||
params.model = model;
|
||||
params.fname_inp.emplace_back(input);
|
||||
params.use_gpu = use_gpu;
|
||||
|
||||
Napi::Function callback = info[1].As<Napi::Function>();
|
||||
Worker* worker = new Worker(callback, params);
|
||||
|
@ -11,7 +11,6 @@ const whisperParams = {
|
||||
language: "en",
|
||||
model: path.join(__dirname, "../../models/ggml-base.en.bin"),
|
||||
fname_inp: "../../samples/jfk.wav",
|
||||
use_gpu: true,
|
||||
};
|
||||
|
||||
const arguments = process.argv.slice(2);
|
||||
|
@ -23,9 +23,7 @@ void bench_main(size_t index) {
|
||||
|
||||
fprintf(stderr, "%s: running benchmark with %d threads - please wait...\n", __func__, n_threads);
|
||||
|
||||
const int n_mels = whisper_model_n_mels(ctx);
|
||||
|
||||
if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
|
||||
if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
|
||||
fprintf(stderr, "error: failed to set mel: %d\n", ret);
|
||||
return;
|
||||
}
|
||||
@ -59,7 +57,7 @@ EMSCRIPTEN_BINDINGS(bench) {
|
||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||
if (g_contexts[i] == nullptr) {
|
||||
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
||||
g_contexts[i] = whisper_init_from_file(path_model.c_str());
|
||||
if (g_contexts[i] != nullptr) {
|
||||
if (g_worker.joinable()) {
|
||||
g_worker.join();
|
||||
|
@ -11,8 +11,6 @@ struct whisper_params {
|
||||
int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat
|
||||
|
||||
std::string model = "models/ggml-base.en.bin";
|
||||
|
||||
bool use_gpu = true;
|
||||
};
|
||||
|
||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||
@ -25,10 +23,9 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
whisper_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
}
|
||||
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||
else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||
else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); }
|
||||
else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
whisper_print_usage(argc, argv, params);
|
||||
@ -48,7 +45,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||
fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
||||
fprintf(stderr, " %-7s 0 - whisper\n", "");
|
||||
fprintf(stderr, " %-7s 1 - memcpy\n", "");
|
||||
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
|
||||
@ -58,10 +54,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
int whisper_bench_full(const whisper_params & params) {
|
||||
// whisper init
|
||||
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
|
||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
||||
|
||||
{
|
||||
fprintf(stderr, "\n");
|
||||
@ -73,9 +66,7 @@ int whisper_bench_full(const whisper_params & params) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
const int n_mels = whisper_model_n_mels(ctx);
|
||||
|
||||
if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
|
||||
if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
|
||||
fprintf(stderr, "error: failed to set mel: %d\n", ret);
|
||||
return 3;
|
||||
}
|
||||
|
@ -243,7 +243,7 @@ EMSCRIPTEN_BINDINGS(command) {
|
||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||
if (g_contexts[i] == nullptr) {
|
||||
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
||||
g_contexts[i] = whisper_init_from_file(path_model.c_str());
|
||||
if (g_contexts[i] != nullptr) {
|
||||
g_running = true;
|
||||
if (g_worker.joinable()) {
|
||||
|
@ -38,7 +38,6 @@ struct whisper_params {
|
||||
bool print_special = false;
|
||||
bool print_energy = false;
|
||||
bool no_timestamps = true;
|
||||
bool use_gpu = true;
|
||||
|
||||
std::string language = "en";
|
||||
std::string model = "models/ggml-base.en.bin";
|
||||
@ -69,7 +68,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
||||
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
||||
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
||||
@ -103,7 +101,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
||||
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
||||
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
||||
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
||||
@ -613,10 +610,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
// whisper init
|
||||
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
|
||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
||||
|
||||
// print some info about the processing
|
||||
{
|
||||
|
@ -48,7 +48,7 @@ if [ -n "$3" ]; then
|
||||
fi
|
||||
|
||||
# Whisper models
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
|
||||
|
||||
# list available models
|
||||
function list_models {
|
||||
|
@ -30,7 +30,6 @@ struct whisper_params {
|
||||
bool translate = false;
|
||||
bool print_special = false;
|
||||
bool print_energy = false;
|
||||
bool use_gpu = true;
|
||||
|
||||
std::string language = "en";
|
||||
std::string model = "models/ggml-base.en.bin";
|
||||
@ -73,7 +72,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
||||
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
||||
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||
else {
|
||||
@ -104,7 +102,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
||||
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
||||
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
||||
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||
fprintf(stderr, "\n");
|
||||
@ -435,9 +432,7 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
// whisper init
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
||||
// init audio
|
||||
|
||||
audio_async audio(30*1000);
|
||||
|
@ -90,7 +90,6 @@ struct whisper_params {
|
||||
bool print_progress = false;
|
||||
bool no_timestamps = false;
|
||||
bool log_score = false;
|
||||
bool use_gpu = true;
|
||||
|
||||
std::string language = "en";
|
||||
std::string prompt;
|
||||
@ -166,7 +165,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
|
||||
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
|
||||
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
whisper_print_usage(argc, argv, params);
|
||||
@ -223,7 +221,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
|
||||
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
|
||||
fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
@ -880,10 +877,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
// whisper init
|
||||
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
|
||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
||||
|
||||
if (ctx == nullptr) {
|
||||
fprintf(stderr, "error: failed to initialize whisper context\n");
|
||||
|
@ -132,7 +132,7 @@ EMSCRIPTEN_BINDINGS(stream) {
|
||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||
if (g_contexts[i] == nullptr) {
|
||||
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
||||
g_contexts[i] = whisper_init_from_file(path_model.c_str());
|
||||
if (g_contexts[i] != nullptr) {
|
||||
g_running = true;
|
||||
if (g_worker.joinable()) {
|
||||
|
@ -48,12 +48,11 @@ struct whisper_params {
|
||||
bool no_context = true;
|
||||
bool no_timestamps = false;
|
||||
bool tinydiarize = false;
|
||||
bool save_audio = false; // save audio to wav file
|
||||
bool use_gpu = true;
|
||||
|
||||
std::string language = "en";
|
||||
std::string model = "models/ggml-base.en.bin";
|
||||
std::string fname_out;
|
||||
bool save_audio = false; // save audio to wav file
|
||||
};
|
||||
|
||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||
@ -66,26 +65,25 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
whisper_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
}
|
||||
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
||||
else if ( arg == "--step") { params.step_ms = std::stoi(argv[++i]); }
|
||||
else if ( arg == "--length") { params.length_ms = std::stoi(argv[++i]); }
|
||||
else if ( arg == "--keep") { params.keep_ms = std::stoi(argv[++i]); }
|
||||
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
|
||||
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
|
||||
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
|
||||
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
||||
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
||||
else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; }
|
||||
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
||||
else if (arg == "-kc" || arg == "--keep-context") { params.no_context = false; }
|
||||
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
||||
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
|
||||
else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
||||
else if ( arg == "--step") { params.step_ms = std::stoi(argv[++i]); }
|
||||
else if ( arg == "--length") { params.length_ms = std::stoi(argv[++i]); }
|
||||
else if ( arg == "--keep") { params.keep_ms = std::stoi(argv[++i]); }
|
||||
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
|
||||
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
|
||||
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
|
||||
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
||||
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
||||
else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; }
|
||||
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
||||
else if (arg == "-kc" || arg == "--keep-context") { params.no_context = false; }
|
||||
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
||||
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
|
||||
else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
|
||||
|
||||
else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
@ -120,9 +118,8 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
||||
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
|
||||
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
|
||||
fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU inference\n", params.use_gpu ? "false" : "true");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
@ -166,10 +163,7 @@ int main(int argc, char ** argv) {
|
||||
exit(0);
|
||||
}
|
||||
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
|
||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
||||
|
||||
std::vector<float> pcmf32 (n_samples_30s, 0.0f);
|
||||
std::vector<float> pcmf32_old;
|
||||
@ -430,4 +424,4 @@ int main(int argc, char ** argv) {
|
||||
whisper_free(ctx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
@ -14,8 +14,6 @@ if (WHISPER_SDL2)
|
||||
../common-sdl.cpp
|
||||
../../ggml.c
|
||||
../../ggml-alloc.c
|
||||
../../ggml-backend.c
|
||||
../../ggml-quants.c
|
||||
../../whisper.cpp)
|
||||
|
||||
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS} ../../)
|
||||
|
@ -63,7 +63,6 @@ struct whisper_params {
|
||||
bool print_energy = false;
|
||||
bool no_timestamps = true;
|
||||
bool verbose_prompt = false;
|
||||
bool use_gpu = true;
|
||||
|
||||
std::string person = "Georgi";
|
||||
std::string language = "en";
|
||||
@ -85,26 +84,25 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
whisper_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
}
|
||||
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
||||
else if (arg == "-vms" || arg == "--voice-ms") { params.voice_ms = std::stoi(argv[++i]); }
|
||||
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
|
||||
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
|
||||
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
|
||||
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
||||
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
||||
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
||||
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
||||
else if (arg == "-vp" || arg == "--verbose-prompt") { params.verbose_prompt = true; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
|
||||
else if (arg == "--session") { params.path_session = argv[++i];}
|
||||
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
||||
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
|
||||
else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
|
||||
else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
|
||||
else if (arg == "--prompt-file") {
|
||||
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
||||
else if (arg == "-vms" || arg == "--voice-ms") { params.voice_ms = std::stoi(argv[++i]); }
|
||||
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
|
||||
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
|
||||
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
|
||||
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
||||
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
||||
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
||||
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
||||
else if (arg == "--verbose-prompt") { params.verbose_prompt = true; }
|
||||
else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
|
||||
else if (arg == "--session") { params.path_session = argv[++i];}
|
||||
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
||||
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
|
||||
else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
|
||||
else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
|
||||
else if (arg == "--prompt-file") {
|
||||
std::ifstream file(argv[++i]);
|
||||
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
|
||||
if (params.prompt.back() == '\n') {
|
||||
@ -112,7 +110,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
}
|
||||
}
|
||||
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
whisper_print_usage(argc, argv, params);
|
||||
@ -128,28 +125,27 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "options:\n");
|
||||
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
|
||||
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
||||
fprintf(stderr, " -vms N, --voice-ms N [%-7d] voice duration in milliseconds\n", params.voice_ms);
|
||||
fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.capture_id);
|
||||
fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
|
||||
fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx);
|
||||
fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.vad_thold);
|
||||
fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.freq_thold);
|
||||
fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
|
||||
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
||||
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
||||
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
||||
fprintf(stderr, " -vp, --verbose-prompt [%-7s] print prompt at start\n", params.verbose_prompt ? "true" : "false");
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
||||
fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
|
||||
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
||||
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
|
||||
fprintf(stderr, " -ml FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
|
||||
fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
|
||||
fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", "");
|
||||
fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n");
|
||||
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
||||
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
|
||||
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
||||
fprintf(stderr, " -vms N, --voice-ms N [%-7d] voice duration in milliseconds\n", params.voice_ms);
|
||||
fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.capture_id);
|
||||
fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
|
||||
fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx);
|
||||
fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.vad_thold);
|
||||
fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.freq_thold);
|
||||
fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
|
||||
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
||||
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
||||
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
||||
fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
|
||||
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
||||
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
|
||||
fprintf(stderr, " -ml FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
|
||||
fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
|
||||
fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", "");
|
||||
fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n");
|
||||
fprintf(stderr, " --verbose-prompt [%-7s] print prompt at start\n", params.verbose_prompt ? "true" : "false");
|
||||
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
@ -256,19 +252,13 @@ int main(int argc, char ** argv) {
|
||||
|
||||
// whisper init
|
||||
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
|
||||
struct whisper_context * ctx_wsp = whisper_init_from_file_with_params(params.model_wsp.c_str(), cparams);
|
||||
struct whisper_context * ctx_wsp = whisper_init_from_file(params.model_wsp.c_str());
|
||||
|
||||
// llama init
|
||||
|
||||
llama_backend_init(true);
|
||||
|
||||
auto lmparams = llama_model_default_params();
|
||||
if (!params.use_gpu) {
|
||||
lmparams.n_gpu_layers = 0;
|
||||
}
|
||||
|
||||
struct llama_model * model_llama = llama_load_model_from_file(params.model_llama.c_str(), lmparams);
|
||||
|
||||
|
@ -271,7 +271,7 @@ EMSCRIPTEN_BINDINGS(talk) {
|
||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||
if (g_contexts[i] == nullptr) {
|
||||
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
||||
g_contexts[i] = whisper_init_from_file(path_model.c_str());
|
||||
if (g_contexts[i] != nullptr) {
|
||||
g_running = true;
|
||||
if (g_worker.joinable()) {
|
||||
|
@ -31,7 +31,6 @@ struct whisper_params {
|
||||
bool print_special = false;
|
||||
bool print_energy = false;
|
||||
bool no_timestamps = true;
|
||||
bool use_gpu = true;
|
||||
|
||||
std::string person = "Santa";
|
||||
std::string language = "en";
|
||||
@ -62,7 +61,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
||||
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
||||
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
|
||||
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
||||
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
|
||||
@ -96,7 +94,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
||||
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
||||
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
||||
fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
|
||||
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
||||
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
|
||||
@ -184,10 +181,8 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
// whisper init
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
|
||||
struct whisper_context * ctx_wsp = whisper_init_from_file_with_params(params.model_wsp.c_str(), cparams);
|
||||
struct whisper_context * ctx_wsp = whisper_init_from_file(params.model_wsp.c_str());
|
||||
|
||||
// gpt init
|
||||
|
||||
|
@ -21,7 +21,7 @@ help()
|
||||
echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
|
||||
echo "options:"
|
||||
echo "-s Step in seconds (default is $step)."
|
||||
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large' (default is '$model')."
|
||||
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large' (default is '$model')."
|
||||
echo "-t Number of threads to use."
|
||||
echo "-h Print this help page."
|
||||
echo
|
||||
|
@ -18,7 +18,9 @@ android {
|
||||
vectorDrawables {
|
||||
useSupportLibrary true
|
||||
}
|
||||
|
||||
ndk {
|
||||
abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
|
||||
}
|
||||
}
|
||||
|
||||
buildTypes {
|
||||
@ -41,10 +43,20 @@ android {
|
||||
composeOptions {
|
||||
kotlinCompilerExtensionVersion '1.5.0'
|
||||
}
|
||||
ndkVersion "25.2.9519653"
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
path = file("src/main/jni/whisper/CMakeLists.txt")
|
||||
}
|
||||
}
|
||||
packagingOptions {
|
||||
resources {
|
||||
excludes += '/META-INF/{AL2.0,LGPL2.1}'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':lib')
|
||||
implementation 'androidx.activity:activity-compose:1.7.2'
|
||||
implementation 'androidx.compose.material:material-icons-core:1.5.0'
|
||||
implementation 'androidx.compose.material3:material3:1.1.1'
|
||||
|
@ -15,7 +15,7 @@ import androidx.lifecycle.viewmodel.initializer
|
||||
import androidx.lifecycle.viewmodel.viewModelFactory
|
||||
import com.whispercppdemo.media.decodeWaveFile
|
||||
import com.whispercppdemo.recorder.Recorder
|
||||
import com.whispercpp.whisper.WhisperContext
|
||||
import com.whispercppdemo.whisper.WhisperContext
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.runBlocking
|
||||
@ -35,7 +35,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
||||
private val modelsPath = File(application.filesDir, "models")
|
||||
private val samplesPath = File(application.filesDir, "samples")
|
||||
private var recorder: Recorder = Recorder()
|
||||
private var whisperContext: com.whispercpp.whisper.WhisperContext? = null
|
||||
private var whisperContext: WhisperContext? = null
|
||||
private var mediaPlayer: MediaPlayer? = null
|
||||
private var recordedFile: File? = null
|
||||
|
||||
@ -47,7 +47,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
||||
}
|
||||
|
||||
private suspend fun printSystemInfo() {
|
||||
printMessage(String.format("System Info: %s\n", com.whispercpp.whisper.WhisperContext.getSystemInfo()))
|
||||
printMessage(String.format("System Info: %s\n", WhisperContext.getSystemInfo()))
|
||||
}
|
||||
|
||||
private suspend fun loadData() {
|
||||
@ -78,7 +78,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
||||
printMessage("Loading model...\n")
|
||||
val models = application.assets.list("models/")
|
||||
if (models != null) {
|
||||
whisperContext = com.whispercpp.whisper.WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
|
||||
whisperContext = WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
|
||||
printMessage("Loaded model ${models[0]}.\n")
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
package com.whispercpp.whisper
|
||||
package com.whispercppdemo.whisper
|
||||
|
||||
import android.content.res.AssetManager
|
||||
import android.os.Build
|
@ -1,4 +1,4 @@
|
||||
package com.whispercpp.whisper
|
||||
package com.whispercppdemo.whisper
|
||||
|
||||
import android.util.Log
|
||||
import java.io.BufferedReader
|
@ -127,11 +127,11 @@ static struct whisper_context *whisper_init_from_asset(
|
||||
.close = &asset_close
|
||||
};
|
||||
|
||||
return whisper_init_with_params(&loader, whisper_context_default_params());
|
||||
return whisper_init(&loader);
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContextFromAsset(
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromAsset(
|
||||
JNIEnv *env, jobject thiz, jobject assetManager, jstring asset_path_str) {
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = NULL;
|
||||
@ -142,18 +142,18 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContextFromAsset(
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContext(
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
|
||||
JNIEnv *env, jobject thiz, jstring model_path_str) {
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = NULL;
|
||||
const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL);
|
||||
context = whisper_init_from_file_with_params(model_path_chars, whisper_context_default_params());
|
||||
context = whisper_init_from_file(model_path_chars);
|
||||
(*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars);
|
||||
return (jlong) context;
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_freeContext(
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_freeContext(
|
||||
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
||||
UNUSED(env);
|
||||
UNUSED(thiz);
|
||||
@ -162,7 +162,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_freeContext(
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe(
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_fullTranscribe(
|
||||
JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) {
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
||||
@ -194,7 +194,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe(
|
||||
}
|
||||
|
||||
JNIEXPORT jint JNICALL
|
||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegmentCount(
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegmentCount(
|
||||
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
||||
UNUSED(env);
|
||||
UNUSED(thiz);
|
||||
@ -203,7 +203,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegmentCount(
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegment(
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegment(
|
||||
JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
||||
@ -213,7 +213,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegment(
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_getSystemInfo(
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getSystemInfo(
|
||||
JNIEnv *env, jobject thiz
|
||||
) {
|
||||
UNUSED(thiz);
|
||||
@ -223,7 +223,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getSystemInfo(
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz,
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz,
|
||||
jint n_threads) {
|
||||
UNUSED(thiz);
|
||||
const char *bench_ggml_memcpy = whisper_bench_memcpy_str(n_threads);
|
||||
@ -231,7 +231,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, j
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz,
|
||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz,
|
||||
jint n_threads) {
|
||||
UNUSED(thiz);
|
||||
const char *bench_ggml_mul_mat = whisper_bench_ggml_mul_mat_str(n_threads);
|
1
examples/whisper.android/lib/.gitignore
vendored
1
examples/whisper.android/lib/.gitignore
vendored
@ -1 +0,0 @@
|
||||
/build
|
@ -1,51 +0,0 @@
|
||||
plugins {
|
||||
id 'com.android.library'
|
||||
id 'org.jetbrains.kotlin.android'
|
||||
}
|
||||
|
||||
android {
|
||||
namespace 'com.whispercpp'
|
||||
compileSdk 34
|
||||
|
||||
defaultConfig {
|
||||
minSdk 26
|
||||
targetSdk 34
|
||||
versionCode 1
|
||||
versionName "1.0"
|
||||
|
||||
ndk {
|
||||
abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
|
||||
}
|
||||
}
|
||||
|
||||
buildTypes {
|
||||
release {
|
||||
minifyEnabled false
|
||||
}
|
||||
}
|
||||
compileOptions {
|
||||
sourceCompatibility JavaVersion.VERSION_1_8
|
||||
targetCompatibility JavaVersion.VERSION_1_8
|
||||
}
|
||||
kotlinOptions {
|
||||
jvmTarget = '1.8'
|
||||
}
|
||||
|
||||
ndkVersion "25.2.9519653"
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
path = file("src/main/jni/whisper/CMakeLists.txt")
|
||||
}
|
||||
}
|
||||
packagingOptions {
|
||||
resources {
|
||||
excludes += '/META-INF/{AL2.0,LGPL2.1}'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation 'androidx.core:core-ktx:1.9.0'
|
||||
implementation 'androidx.appcompat:appcompat:1.6.1'
|
||||
implementation 'com.google.android.material:material:1.8.0'
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
|
||||
</manifest>
|
@ -14,4 +14,3 @@ dependencyResolutionManagement {
|
||||
}
|
||||
rootProject.name = "WhisperCppDemo"
|
||||
include ':app'
|
||||
include ':lib'
|
||||
|
@ -17,8 +17,8 @@
|
||||
18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; };
|
||||
18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; };
|
||||
18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; };
|
||||
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK -DGGML_USE_METAL"; }; };
|
||||
18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL"; }; };
|
||||
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML"; }; };
|
||||
18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE"; }; };
|
||||
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
|
||||
18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.c */; };
|
||||
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
|
||||
|
@ -61,13 +61,7 @@ void AudioInputCallback(void * inUserData,
|
||||
NSLog(@"Loading model from %@", modelPath);
|
||||
|
||||
// create ggml context
|
||||
|
||||
struct whisper_context_params cparams = whisper_context_default_params();
|
||||
#if TARGET_OS_SIMULATOR
|
||||
cparams.use_gpu = false;
|
||||
NSLog(@"Running on simulator, using CPU");
|
||||
#endif
|
||||
stateInp.ctx = whisper_init_from_file_with_params([modelPath UTF8String], cparams);
|
||||
stateInp.ctx = whisper_init_from_file([modelPath UTF8String]);
|
||||
|
||||
// check if the model was loaded successfully
|
||||
if (stateInp.ctx == NULL) {
|
||||
|
@ -1,5 +1,4 @@
|
||||
import Foundation
|
||||
import whisper
|
||||
|
||||
enum WhisperError: Error {
|
||||
case couldNotInitializeContext
|
||||
@ -56,12 +55,7 @@ actor WhisperContext {
|
||||
}
|
||||
|
||||
static func createContext(path: String) throws -> WhisperContext {
|
||||
var params = whisper_context_default_params()
|
||||
#if targetEnvironment(simulator)
|
||||
params.use_gpu = false
|
||||
print("Running on the simulator, using CPU")
|
||||
#endif
|
||||
let context = whisper_init_from_file_with_params(path, params)
|
||||
let context = whisper_init_from_file(path)
|
||||
if let context {
|
||||
return WhisperContext(context: context)
|
||||
} else {
|
||||
|
@ -0,0 +1,4 @@
|
||||
//
|
||||
// Use this file to import your target's public headers that you would like to expose to Swift.
|
||||
//
|
||||
#import "whisper.h"
|
@ -15,9 +15,14 @@
|
||||
0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9A29539CCF003032C3 /* WhisperCppDemoApp.swift */; };
|
||||
0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; };
|
||||
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
|
||||
0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */; };
|
||||
0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC729539EB0003032C3 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-Wno-shorten-64-to-32"; }; };
|
||||
0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC929539EB0003032C3 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -Wno-shorten-64-to-32"; }; };
|
||||
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
|
||||
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
|
||||
E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */ = {isa = PBXBuildFile; productRef = E3F92DC42AFA8E3800A6A9D4 /* whisper */; };
|
||||
18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE14C2AF555FA0044A204 /* ggml-backend.c */; };
|
||||
18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1512AF555FA0044A204 /* ggml-quants.c */; };
|
||||
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 18AED47F2AB21F2B009D854F /* ggml-alloc.c */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
@ -31,9 +36,22 @@
|
||||
0AAC5D9C29539CCF003032C3 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
||||
0AAC5D9E29539CD0003032C3 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
||||
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
|
||||
0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
|
||||
0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "WhisperCppDemo-Bridging-Header.h"; sourceTree = "<group>"; };
|
||||
0AAC5DC729539EB0003032C3 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whisper.cpp; sourceTree = "<group>"; };
|
||||
0AAC5DC829539EB0003032C3 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whisper.h; sourceTree = "<group>"; };
|
||||
0AAC5DC929539EB0003032C3 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ggml.c; sourceTree = "<group>"; };
|
||||
0AAC5DCA29539EB0003032C3 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ggml.h; sourceTree = "<group>"; };
|
||||
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
|
||||
0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
|
||||
E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = whisper.cpp; path = ../..; sourceTree = "<group>"; };
|
||||
18ABE14C2AF555FA0044A204 /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-backend.c"; sourceTree = "<group>"; };
|
||||
18ABE14D2AF555FA0044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-backend.h"; sourceTree = "<group>"; };
|
||||
18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-backend-impl.h"; sourceTree = "<group>"; };
|
||||
18ABE14F2AF555FA0044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-quants.h"; sourceTree = "<group>"; };
|
||||
18ABE1502AF555FA0044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-impl.h"; sourceTree = "<group>"; };
|
||||
18ABE1512AF555FA0044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-quants.c"; sourceTree = "<group>"; };
|
||||
18AED47F2AB21F2B009D854F /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-alloc.c"; sourceTree = "<group>"; };
|
||||
18AED4802AB21F2B009D854F /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-alloc.h"; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
@ -41,7 +59,6 @@
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
@ -77,12 +94,11 @@
|
||||
0AAC5D8E29539CCF003032C3 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */,
|
||||
0A8E48FF2954B3F100704C1B /* README.md */,
|
||||
0AAC5DC529539E89003032C3 /* whisper.cpp */,
|
||||
0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */,
|
||||
0AAC5D9929539CCF003032C3 /* whisper.swiftui.demo */,
|
||||
0AAC5D9829539CCF003032C3 /* Products */,
|
||||
E3F92DC32AFA8E3800A6A9D4 /* Frameworks */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
@ -107,9 +123,39 @@
|
||||
path = whisper.swiftui.demo;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
0AAC5DA129539CD0003032C3 /* Preview Content */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */,
|
||||
);
|
||||
name = "Preview Content";
|
||||
path = "../Preview Content";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
0AAC5DC529539E89003032C3 /* whisper.cpp */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */,
|
||||
18ABE14C2AF555FA0044A204 /* ggml-backend.c */,
|
||||
18ABE14D2AF555FA0044A204 /* ggml-backend.h */,
|
||||
18ABE1502AF555FA0044A204 /* ggml-impl.h */,
|
||||
18ABE1512AF555FA0044A204 /* ggml-quants.c */,
|
||||
18ABE14F2AF555FA0044A204 /* ggml-quants.h */,
|
||||
18AED47F2AB21F2B009D854F /* ggml-alloc.c */,
|
||||
18AED4802AB21F2B009D854F /* ggml-alloc.h */,
|
||||
0AAC5DC929539EB0003032C3 /* ggml.c */,
|
||||
0AAC5DCA29539EB0003032C3 /* ggml.h */,
|
||||
0AAC5DC729539EB0003032C3 /* whisper.cpp */,
|
||||
0AAC5DC829539EB0003032C3 /* whisper.h */,
|
||||
);
|
||||
name = whisper.cpp;
|
||||
path = ../..;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */,
|
||||
0AAC5DD02953A394003032C3 /* LibWhisper.swift */,
|
||||
);
|
||||
path = whisper.cpp.swift;
|
||||
@ -128,17 +174,11 @@
|
||||
children = (
|
||||
0AAC5D9E29539CD0003032C3 /* Assets.xcassets */,
|
||||
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */,
|
||||
0AAC5DA129539CD0003032C3 /* Preview Content */,
|
||||
);
|
||||
path = "Supporting files";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
E3F92DC32AFA8E3800A6A9D4 /* Frameworks */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
);
|
||||
name = Frameworks;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
@ -155,9 +195,6 @@
|
||||
dependencies = (
|
||||
);
|
||||
name = whisper.swiftui;
|
||||
packageProductDependencies = (
|
||||
E3F92DC42AFA8E3800A6A9D4 /* whisper */,
|
||||
);
|
||||
productName = WhisperCppDemo;
|
||||
productReference = 0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */;
|
||||
productType = "com.apple.product-type.application";
|
||||
@ -202,6 +239,7 @@
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
0AA751482953AC2E001EE061 /* samples in Resources */,
|
||||
0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */,
|
||||
0A8E49002954B3F100704C1B /* README.md in Resources */,
|
||||
0AA751492953AC2E001EE061 /* models in Resources */,
|
||||
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */,
|
||||
@ -217,10 +255,15 @@
|
||||
files = (
|
||||
0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */,
|
||||
0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */,
|
||||
0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */,
|
||||
18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */,
|
||||
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
|
||||
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
|
||||
0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
|
||||
0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */,
|
||||
0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
|
||||
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */,
|
||||
18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
@ -348,7 +391,7 @@
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 1;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = "";
|
||||
DEVELOPMENT_TEAM = P8JZH34X63;
|
||||
ENABLE_HARDENED_RUNTIME = YES;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
@ -372,6 +415,7 @@
|
||||
SDKROOT = auto;
|
||||
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
@ -388,7 +432,7 @@
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 1;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = "";
|
||||
DEVELOPMENT_TEAM = P8JZH34X63;
|
||||
ENABLE_HARDENED_RUNTIME = YES;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
@ -417,6 +461,7 @@
|
||||
SDKROOT = auto;
|
||||
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
};
|
||||
@ -444,13 +489,6 @@
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
|
||||
/* Begin XCSwiftPackageProductDependency section */
|
||||
E3F92DC42AFA8E3800A6A9D4 /* whisper */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
productName = whisper;
|
||||
};
|
||||
/* End XCSwiftPackageProductDependency section */
|
||||
};
|
||||
rootObject = 0AAC5D8F29539CCF003032C3 /* Project object */;
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
|
||||
|
||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||
if (g_contexts[i] == nullptr) {
|
||||
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
||||
g_contexts[i] = whisper_init_from_file(path_model.c_str());
|
||||
if (g_contexts[i] != nullptr) {
|
||||
return i + 1;
|
||||
} else {
|
||||
|
@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
|
||||
|
||||
for model in "${models[@]}"; do
|
||||
python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
|
||||
|
@ -1030,7 +1030,7 @@ void ggml_metal_graph_compute(
|
||||
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2];
|
||||
[encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3];
|
||||
[encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4];
|
||||
[encoder setThreadgroupMemoryLength:GGML_PAD(nth/32*sizeof(float), 16) atIndex:0];
|
||||
[encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0];
|
||||
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
||||
} break;
|
||||
@ -1342,7 +1342,7 @@ void ggml_metal_graph_compute(
|
||||
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
|
||||
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3];
|
||||
[encoder setBytes:&eps length:sizeof( float) atIndex:4];
|
||||
[encoder setThreadgroupMemoryLength:GGML_PAD(nth/32*sizeof(float), 16) atIndex:0];
|
||||
[encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0];
|
||||
|
||||
const int64_t nrows = ggml_nrows(src0);
|
||||
|
||||
@ -1361,7 +1361,7 @@ void ggml_metal_graph_compute(
|
||||
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
|
||||
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3];
|
||||
[encoder setBytes:&eps length:sizeof( float) atIndex:4];
|
||||
[encoder setThreadgroupMemoryLength:GGML_PAD(nth*sizeof(float), 16) atIndex:0];
|
||||
[encoder setThreadgroupMemoryLength:nth*sizeof(float) atIndex:0];
|
||||
|
||||
const int64_t nrows = ggml_nrows(src0);
|
||||
|
||||
|
21
ggml.c
21
ggml.c
@ -143,6 +143,12 @@ void ggml_print_backtrace(void) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/*#define GGML_PERF*/
|
||||
#define GGML_DEBUG 0
|
||||
#define GGML_GELU_FP16
|
||||
@ -271,12 +277,6 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
||||
// floating point type used to accumulate sums
|
||||
typedef double ggml_float;
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
//
|
||||
// global data
|
||||
//
|
||||
@ -16391,9 +16391,6 @@ struct ggml_compute_state_shared {
|
||||
// synchronization primitives
|
||||
atomic_int n_active; // num active threads
|
||||
atomic_int node_n; // active graph node
|
||||
|
||||
bool (*abort_callback)(void * data); // abort ggml_graph_compute when true
|
||||
void * abort_callback_data;
|
||||
};
|
||||
|
||||
struct ggml_compute_state {
|
||||
@ -16744,6 +16741,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||
|
||||
node_n = atomic_load(&state->shared->node_n);
|
||||
if (node_n != last) break;
|
||||
|
||||
if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
|
||||
break;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@ -17037,8 +17038,6 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
||||
/*.n_threads =*/ n_threads,
|
||||
/*.n_active =*/ n_threads,
|
||||
/*.node_n =*/ -1,
|
||||
/*.abort_callback =*/ NULL,
|
||||
/*.abort_callback_data =*/ NULL,
|
||||
};
|
||||
struct ggml_compute_state * workers = alloca(sizeof(struct ggml_compute_state)*n_threads);
|
||||
|
||||
|
@ -50,8 +50,7 @@ https://huggingface.co/ggerganov/whisper.cpp/tree/main
|
||||
| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
|
||||
| medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
|
||||
| large-v1 | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
|
||||
| large-v2 | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
|
||||
| large | 2.9 GB | ~4.7 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
|
||||
| large | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
|
||||
|
||||
## Model files for testing purposes
|
||||
|
||||
|
@ -78,14 +78,14 @@ def convert_hf_whisper(hf_model_name_or_path: str, whisper_state_path: str):
|
||||
# Ported from models/convert-whisper-to-coreml.py
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
|
||||
parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
|
||||
parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
|
||||
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
|
||||
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
|
||||
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
|
||||
if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
|
||||
raise ValueError("Invalid model name")
|
||||
|
||||
pt_target_path = f"models/hf-{args.model_name}.pt"
|
||||
|
@ -228,7 +228,7 @@ with np.load(dir_whisper / "whisper" / "assets" / "mel_filters.npz") as f:
|
||||
# for backwards compatibility, also check for older hf_transformers format tokenizer files
|
||||
# old format: dir_whisper/whisper/assets/[multilingual/gpt2]/vocab.json
|
||||
# new format: dir_whisper/whisper/assets/[multilingual/gpt2].tiktoken
|
||||
multilingual = hparams["n_vocab"] >= 51865
|
||||
multilingual = hparams["n_vocab"] == 51865
|
||||
tokenizer = dir_whisper / "whisper" / "assets" / (multilingual and "multilingual.tiktoken" or "gpt2.tiktoken")
|
||||
tokenizer_type = "tiktoken"
|
||||
if not tokenizer.is_file():
|
||||
|
@ -194,7 +194,7 @@ class TextDecoderANE(TextDecoder):
|
||||
x = x.permute(0,2,3,1).squeeze(0)
|
||||
|
||||
# ANE can only load tensors with dim size of at most 16,384 - whisper uses 51,864 (en) or 51,865 (multi-lang) tokens so we need to compute in chunks
|
||||
if self.token_embedding.weight.shape[0] >= 51865:
|
||||
if self.token_embedding.weight.shape[0] == 51865:
|
||||
# split in 11 chunks - 4715 each
|
||||
splits = self.token_embedding.weight.split(self.token_embedding.weight.shape[0]//11, dim=0)
|
||||
logits = torch.cat([torch.einsum('bid,jd->bij', x, split) for split in splits]).view(*x.shape[:2], -1)
|
||||
@ -296,13 +296,13 @@ def convert_decoder(hparams, model, quantize=False):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
|
||||
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
|
||||
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
|
||||
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
|
||||
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
|
||||
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
|
||||
raise ValueError("Invalid model name")
|
||||
|
||||
whisper = load_model(args.model).cpu()
|
||||
|
@ -38,10 +38,10 @@ def convert_encoder(hparams, encoder, mname):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
|
||||
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
|
||||
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
|
||||
raise ValueError("Invalid model name")
|
||||
|
||||
whisper = load_model(args.model).cpu()
|
||||
|
@ -19,7 +19,7 @@ function get_script_path() {
|
||||
models_path="$(get_script_path)"
|
||||
|
||||
# Whisper models
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
|
||||
|
||||
# list available models
|
||||
function list_models {
|
||||
|
@ -8,7 +8,7 @@ popd
|
||||
set argc=0
|
||||
for %%x in (%*) do set /A argc+=1
|
||||
|
||||
set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large
|
||||
set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large
|
||||
|
||||
if %argc% neq 1 (
|
||||
echo.
|
||||
@ -57,8 +57,8 @@ goto :eof
|
||||
:list_models
|
||||
echo.
|
||||
echo Available models:
|
||||
(for %%a in (%models%) do (
|
||||
echo %%a
|
||||
(for %%a in (%models%) do (
|
||||
echo %%a
|
||||
))
|
||||
echo.
|
||||
exit /b
|
||||
|
@ -41,7 +41,6 @@ models=(
|
||||
"medium-q5_0"
|
||||
"medium.en-q5_0"
|
||||
"large-v1"
|
||||
"large-v2"
|
||||
"large"
|
||||
"large-q5_0"
|
||||
)
|
||||
|
@ -1 +0,0 @@
|
||||
../ggml.h
|
@ -1 +0,0 @@
|
||||
../whisper.h
|
@ -19,7 +19,7 @@
|
||||
cd `dirname $0`
|
||||
|
||||
# Whisper models
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
|
||||
|
||||
# list available models
|
||||
function list_models {
|
||||
|
1242
whisper.cpp
1242
whisper.cpp
File diff suppressed because it is too large
Load Diff
60
whisper.h
60
whisper.h
@ -1,20 +1,10 @@
|
||||
#ifndef WHISPER_H
|
||||
#define WHISPER_H
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifdef __GNUC__
|
||||
# define WHISPER_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
|
||||
#elif defined(_MSC_VER)
|
||||
# define WHISPER_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
|
||||
#else
|
||||
# define WHISPER_DEPRECATED(func, hint) func
|
||||
#endif
|
||||
|
||||
#ifdef WHISPER_SHARED
|
||||
# ifdef _WIN32
|
||||
# ifdef WHISPER_BUILD
|
||||
@ -31,6 +21,7 @@
|
||||
|
||||
#define WHISPER_SAMPLE_RATE 16000
|
||||
#define WHISPER_N_FFT 400
|
||||
#define WHISPER_N_MEL 80
|
||||
#define WHISPER_HOP_LENGTH 160
|
||||
#define WHISPER_CHUNK_SIZE 30
|
||||
|
||||
@ -80,10 +71,6 @@ extern "C" {
|
||||
|
||||
typedef int whisper_token;
|
||||
|
||||
struct whisper_context_params {
|
||||
bool use_gpu;
|
||||
};
|
||||
|
||||
typedef struct whisper_token_data {
|
||||
whisper_token id; // token id
|
||||
whisper_token tid; // forced timestamp token id
|
||||
@ -112,40 +99,15 @@ extern "C" {
|
||||
// Various functions for loading a ggml whisper model.
|
||||
// Allocate (almost) all memory needed for the model.
|
||||
// Return NULL on failure
|
||||
WHISPER_API struct whisper_context * whisper_init_from_file_with_params (const char * path_model, struct whisper_context_params params);
|
||||
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size, struct whisper_context_params params);
|
||||
WHISPER_API struct whisper_context * whisper_init_with_params (struct whisper_model_loader * loader, struct whisper_context_params params);
|
||||
WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model);
|
||||
WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size);
|
||||
WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader);
|
||||
|
||||
// These are the same as the above, but the internal state of the context is not allocated automatically
|
||||
// It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
|
||||
WHISPER_API struct whisper_context * whisper_init_from_file_with_params_no_state (const char * path_model, struct whisper_context_params params);
|
||||
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size, struct whisper_context_params params);
|
||||
WHISPER_API struct whisper_context * whisper_init_with_params_no_state (struct whisper_model_loader * loader, struct whisper_context_params params);
|
||||
|
||||
WHISPER_DEPRECATED(
|
||||
WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model),
|
||||
"use whisper_init_from_file_with_params instead"
|
||||
);
|
||||
WHISPER_DEPRECATED(
|
||||
WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size),
|
||||
"use whisper_init_from_buffer_with_params instead"
|
||||
);
|
||||
WHISPER_DEPRECATED(
|
||||
WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader),
|
||||
"use whisper_init_with_params instead"
|
||||
);
|
||||
WHISPER_DEPRECATED(
|
||||
WHISPER_API struct whisper_context * whisper_init_from_file_no_state(const char * path_model),
|
||||
"use whisper_init_from_file_with_params_no_state instead"
|
||||
);
|
||||
WHISPER_DEPRECATED(
|
||||
WHISPER_API struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size),
|
||||
"use whisper_init_from_buffer_with_params_no_state instead"
|
||||
);
|
||||
WHISPER_DEPRECATED(
|
||||
WHISPER_API struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader),
|
||||
"use whisper_init_with_params_no_state instead"
|
||||
);
|
||||
WHISPER_API struct whisper_context * whisper_init_from_file_no_state(const char * path_model);
|
||||
WHISPER_API struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size);
|
||||
WHISPER_API struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader);
|
||||
|
||||
WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
|
||||
|
||||
@ -170,7 +132,6 @@ extern "C" {
|
||||
WHISPER_API void whisper_free (struct whisper_context * ctx);
|
||||
WHISPER_API void whisper_free_state(struct whisper_state * state);
|
||||
WHISPER_API void whisper_free_params(struct whisper_full_params * params);
|
||||
WHISPER_API void whisper_free_context_params(struct whisper_context_params * params);
|
||||
|
||||
// Convert RAW PCM audio to log mel spectrogram.
|
||||
// The resulting spectrogram is stored inside the default state of the provided whisper context.
|
||||
@ -481,9 +442,7 @@ extern "C" {
|
||||
void * logits_filter_callback_user_data;
|
||||
};
|
||||
|
||||
// NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
|
||||
WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref();
|
||||
WHISPER_API struct whisper_context_params whisper_context_default_params(void);
|
||||
// NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_params()
|
||||
WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
|
||||
WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
|
||||
|
||||
@ -572,7 +531,8 @@ extern "C" {
|
||||
|
||||
// Control logging output; default behavior is to print to stderr
|
||||
|
||||
WHISPER_API void whisper_log_set(ggml_log_callback log_callback, void * user_data);
|
||||
typedef void (*whisper_log_callback)(const char * line);
|
||||
WHISPER_API void whisper_set_log_callback(whisper_log_callback callback);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
Reference in New Issue
Block a user