Compare commits

..

1 Commits

Author SHA1 Message Date
aaa3b5e5f6 ggml : try to fix the abort mechanism 2023-11-05 20:02:24 +02:00
74 changed files with 1786 additions and 1571 deletions

View File

@ -88,7 +88,7 @@ jobs:
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
apt update apt update
apt install -y build-essential cmake libsdl2-dev apt install -y build-essential cmake libsdl2-dev
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
make make
ctest -L gh --output-on-failure' ctest -L gh --output-on-failure'
@ -115,7 +115,7 @@ jobs:
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
apt update apt update
apt install -y build-essential cmake libsdl2-dev apt install -y build-essential cmake libsdl2-dev
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
make make
ctest -L gh --output-on-failure' ctest -L gh --output-on-failure'
@ -182,7 +182,7 @@ jobs:
run: > run: >
cmake -S . -B ./build -A ${{ matrix.arch }} cmake -S . -B ./build -A ${{ matrix.arch }}
-DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-DWHISPER_SDL2=${{ matrix.sdl2 }} -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
- name: Build - name: Build
run: | run: |
@ -217,10 +217,10 @@ jobs:
sdl2: [ON] sdl2: [ON]
include: include:
- arch: Win32 - arch: Win32
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.24/OpenBLAS-0.3.24-x86.zip obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
s2arc: x86 s2arc: x86
- arch: x64 - arch: x64
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.24/OpenBLAS-0.3.24-x64.zip obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
s2arc: x64 s2arc: x64
- sdl2: ON - sdl2: ON
s2ver: 2.26.0 s2ver: 2.26.0
@ -239,7 +239,7 @@ jobs:
7z x blas.zip -oblas -y 7z x blas.zip -oblas -y
copy blas/include/cblas.h . copy blas/include/cblas.h .
copy blas/include/openblas_config.h . copy blas/include/openblas_config.h .
echo "OPENBLAS_PATH=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
- name: Fetch SDL2 and set SDL2_DIR - name: Fetch SDL2 and set SDL2_DIR
if: matrix.sdl2 == 'ON' if: matrix.sdl2 == 'ON'
@ -252,9 +252,9 @@ jobs:
run: > run: >
cmake -S . -B ./build -A ${{ matrix.arch }} cmake -S . -B ./build -A ${{ matrix.arch }}
-DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-DWHISPER_OPENBLAS=${{ matrix.blas }} -DWHISPER_SUPPORT_OPENBLAS=${{ matrix.blas }}
-DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib" -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
-DWHISPER_SDL2=${{ matrix.sdl2 }} -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
- name: Build - name: Build
run: | run: |
@ -263,7 +263,7 @@ jobs:
- name: Copy libopenblas.dll - name: Copy libopenblas.dll
if: matrix.blas == 'ON' if: matrix.blas == 'ON'
run: copy "$env:OPENBLAS_PATH/bin/libopenblas.dll" build/bin/${{ matrix.build }} run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
- name: Copy SDL2.dll - name: Copy SDL2.dll
if: matrix.sdl2 == 'ON' if: matrix.sdl2 == 'ON'

5
.gitignore vendored
View File

@ -18,11 +18,6 @@ build-no-accel/
build-sanitize-addr/ build-sanitize-addr/
build-sanitize-thread/ build-sanitize-thread/
# SPM
.build/
.swiftpm
*.metallib
/main /main
/stream /stream
/command /command

View File

@ -1,6 +1,6 @@
cmake_minimum_required (VERSION 3.5) cmake_minimum_required (VERSION 3.5)
project(whisper.cpp VERSION 1.4.3) project(whisper.cpp VERSION 1.4.2)
# Add path to modules # Add path to modules
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")

View File

@ -307,7 +307,7 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
$(CC) $(CFLAGS) -c $< -o $@ $(CC) $(CFLAGS) -c $< -o $@
WHISPER_OBJ += ggml.o ggml-alloc.o ggml-backend.o ggml-quants.o WHISPER_OBJ += ggml-alloc.o ggml-backend.o ggml-quants.o
whisper.o: whisper.cpp whisper.h ggml.h ggml-cuda.h whisper.o: whisper.cpp whisper.h ggml.h ggml-cuda.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
@ -331,11 +331,11 @@ ggml-metal.o: ggml-metal.m ggml-metal.h
WHISPER_OBJ += ggml-metal.o WHISPER_OBJ += ggml-metal.o
endif endif
libwhisper.a: $(WHISPER_OBJ) libwhisper.a: ggml.o $(WHISPER_OBJ)
$(AR) rcs libwhisper.a $(WHISPER_OBJ) $(AR) rcs libwhisper.a ggml.o $(WHISPER_OBJ)
libwhisper.so: $(WHISPER_OBJ) libwhisper.so: ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so $(WHISPER_OBJ) $(LDFLAGS) $(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o $(WHISPER_OBJ) $(LDFLAGS)
clean: clean:
rm -f *.o main stream command talk talk-llama bench quantize lsp libwhisper.a libwhisper.so rm -f *.o main stream command talk talk-llama bench quantize lsp libwhisper.a libwhisper.so
@ -349,30 +349,30 @@ CC_SDL=`sdl2-config --cflags --libs`
SRC_COMMON = examples/common.cpp examples/common-ggml.cpp SRC_COMMON = examples/common.cpp examples/common-ggml.cpp
SRC_COMMON_SDL = examples/common-sdl.cpp SRC_COMMON_SDL = examples/common-sdl.cpp
main: examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ) main: examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o main $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o main $(LDFLAGS)
./main -h ./main -h
bench: examples/bench/bench.cpp $(WHISPER_OBJ) bench: examples/bench/bench.cpp ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp $(WHISPER_OBJ) -o bench $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o $(WHISPER_OBJ) -o bench $(LDFLAGS)
quantize: examples/quantize/quantize.cpp $(WHISPER_OBJ) $(SRC_COMMON) quantize: examples/quantize/quantize.cpp ggml.o $(WHISPER_OBJ) $(SRC_COMMON)
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o quantize $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o quantize $(LDFLAGS)
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS)
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
# #
# Audio samples # Audio samples
@ -417,10 +417,9 @@ samples:
.PHONY: medium.en .PHONY: medium.en
.PHONY: medium .PHONY: medium
.PHONY: large-v1 .PHONY: large-v1
.PHONY: large-v2
.PHONY: large .PHONY: large
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large: main tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
bash ./models/download-ggml-model.sh $@ bash ./models/download-ggml-model.sh $@
@echo "" @echo ""
@echo "===============================================" @echo "==============================================="

View File

@ -1,77 +0,0 @@
// swift-tools-version:5.5
import PackageDescription
#if arch(arm) || arch(arm64)
let platforms: [SupportedPlatform]? = [
.macOS(.v12),
.iOS(.v14),
.watchOS(.v4),
.tvOS(.v14)
]
let exclude: [String] = []
let resources: [Resource] = [
.process("ggml-metal.metal")
]
let additionalSources: [String] = ["ggml-metal.m"]
let additionalSettings: [CSetting] = [
.unsafeFlags(["-fno-objc-arc"]),
.define("GGML_USE_METAL")
]
#else
let platforms: [SupportedPlatform]? = nil
let exclude: [String] = ["ggml-metal.metal"]
let resources: [Resource] = []
let additionalSources: [String] = []
let additionalSettings: [CSetting] = []
#endif
let package = Package(
name: "whisper",
platforms: platforms,
products: [
.library(name: "whisper", targets: ["whisper"]),
],
targets: [
.target(
name: "whisper",
path: ".",
exclude: exclude + [
"bindings",
"cmake",
"coreml",
"examples",
"extra",
"models",
"samples",
"tests",
"CMakeLists.txt",
"ggml-cuda.cu",
"ggml-cuda.h",
"Makefile"
],
sources: [
"ggml.c",
"whisper.cpp",
"ggml-alloc.c",
"ggml-backend.c",
"ggml-quants.c"
] + additionalSources,
resources: resources,
publicHeadersPath: "spm-headers",
cSettings: [
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
.define("GGML_USE_ACCELERATE")
// NOTE: NEW_LAPACK will required iOS version 16.4+
// We should consider add this in the future when we drop support for iOS 14
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
// .define("ACCELERATE_NEW_LAPACK"),
// .define("ACCELERATE_LAPACK_ILP64")
] + additionalSettings,
linkerSettings: [
.linkedFramework("Accelerate")
]
)
],
cxxLanguageStandard: .cxx11
)

View File

@ -6,7 +6,7 @@
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/) [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
Beta: [v1.4.3](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.3) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126) Beta: [v1.4.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.2) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model: High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
@ -234,7 +234,6 @@ make small
make medium.en make medium.en
make medium make medium
make large-v1 make large-v1
make large-v2
make large make large
``` ```
@ -246,7 +245,7 @@ make large
| base | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` | | base | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
| small | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` | | small | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
| medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` | | medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
| large | 2.9 GB | ~3.3 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` | | large | 2.9 GB | ~3.3 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
## Quantization ## Quantization

View File

@ -24,7 +24,7 @@ const (
var ( var (
// The models which will be downloaded, if no model is specified as an argument // The models which will be downloaded, if no model is specified as an argument
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large"} modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large"}
) )
var ( var (

View File

@ -83,6 +83,7 @@ const (
SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second
SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
NumFFT = C.WHISPER_N_FFT NumFFT = C.WHISPER_N_FFT
NumMEL = C.WHISPER_N_MEL
HopLength = C.WHISPER_HOP_LENGTH HopLength = C.WHISPER_HOP_LENGTH
ChunkSize = C.WHISPER_CHUNK_SIZE ChunkSize = C.WHISPER_CHUNK_SIZE
) )
@ -102,7 +103,7 @@ var (
func Whisper_init(path string) *Context { func Whisper_init(path string) *Context {
cPath := C.CString(path) cPath := C.CString(path)
defer C.free(unsafe.Pointer(cPath)) defer C.free(unsafe.Pointer(cPath))
if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil { if ctx := C.whisper_init_from_file(cPath); ctx != nil {
return (*Context)(ctx) return (*Context)(ctx)
} else { } else {
return nil return nil

View File

@ -4,7 +4,6 @@ import com.sun.jna.Structure;
import com.sun.jna.ptr.PointerByReference; import com.sun.jna.ptr.PointerByReference;
import io.github.ggerganov.whispercpp.ggml.GgmlType; import io.github.ggerganov.whispercpp.ggml.GgmlType;
import io.github.ggerganov.whispercpp.WhisperModel; import io.github.ggerganov.whispercpp.WhisperModel;
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
import java.util.List; import java.util.List;
@ -24,9 +23,8 @@ public class WhisperContext extends Structure {
public PointerByReference vocab; public PointerByReference vocab;
public PointerByReference state; public PointerByReference state;
/** populated by whisper_init_from_file_with_params() */ /** populated by whisper_init_from_file() */
String path_model; String path_model;
WhisperContextParams params;
// public static class ByReference extends WhisperContext implements Structure.ByReference { // public static class ByReference extends WhisperContext implements Structure.ByReference {
// } // }

View File

@ -2,7 +2,6 @@ package io.github.ggerganov.whispercpp;
import com.sun.jna.Native; import com.sun.jna.Native;
import com.sun.jna.Pointer; import com.sun.jna.Pointer;
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
import io.github.ggerganov.whispercpp.params.WhisperFullParams; import io.github.ggerganov.whispercpp.params.WhisperFullParams;
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy; import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
@ -16,9 +15,8 @@ import java.io.IOException;
public class WhisperCpp implements AutoCloseable { public class WhisperCpp implements AutoCloseable {
private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance; private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
private Pointer ctx = null; private Pointer ctx = null;
private Pointer paramsPointer = null; private Pointer greedyPointer = null;
private Pointer greedyParamsPointer = null; private Pointer beamPointer = null;
private Pointer beamParamsPointer = null;
public File modelDir() { public File modelDir() {
String modelDirPath = System.getenv("XDG_CACHE_HOME"); String modelDirPath = System.getenv("XDG_CACHE_HOME");
@ -33,18 +31,6 @@ public class WhisperCpp implements AutoCloseable {
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en") * @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
*/ */
public void initContext(String modelPath) throws FileNotFoundException { public void initContext(String modelPath) throws FileNotFoundException {
initContextImpl(modelPath, getContextDefaultParams());
}
/**
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
* @param params - params to use when initialising the context
*/
public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
initContextImpl(modelPath, params);
}
private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
if (ctx != null) { if (ctx != null) {
lib.whisper_free(ctx); lib.whisper_free(ctx);
} }
@ -57,26 +43,13 @@ public class WhisperCpp implements AutoCloseable {
modelPath = new File(modelDir(), modelPath).getAbsolutePath(); modelPath = new File(modelDir(), modelPath).getAbsolutePath();
} }
ctx = lib.whisper_init_from_file_with_params(modelPath, params); ctx = lib.whisper_init_from_file(modelPath);
if (ctx == null) { if (ctx == null) {
throw new FileNotFoundException(modelPath); throw new FileNotFoundException(modelPath);
} }
} }
/**
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
* Because this function allocates memory for the params, the caller must call either:
* - call `whisper_free_context_params()`
* - `Native.free(Pointer.nativeValue(pointer));`
*/
public WhisperContextParams getContextDefaultParams() {
paramsPointer = lib.whisper_context_default_params_by_ref();
WhisperContextParams params = new WhisperContextParams(paramsPointer);
params.read();
return params;
}
/** /**
* Provides default params which can be used with `whisper_full()` etc. * Provides default params which can be used with `whisper_full()` etc.
* Because this function allocates memory for the params, the caller must call either: * Because this function allocates memory for the params, the caller must call either:
@ -90,15 +63,15 @@ public class WhisperCpp implements AutoCloseable {
// whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy. // whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) { if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) {
if (greedyParamsPointer == null) { if (greedyPointer == null) {
greedyParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal()); greedyPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
} }
pointer = greedyParamsPointer; pointer = greedyPointer;
} else { } else {
if (beamParamsPointer == null) { if (beamPointer == null) {
beamParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal()); beamPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
} }
pointer = beamParamsPointer; pointer = beamPointer;
} }
WhisperFullParams params = new WhisperFullParams(pointer); WhisperFullParams params = new WhisperFullParams(pointer);
@ -120,17 +93,13 @@ public class WhisperCpp implements AutoCloseable {
} }
private void freeParams() { private void freeParams() {
if (paramsPointer != null) { if (greedyPointer != null) {
Native.free(Pointer.nativeValue(paramsPointer)); Native.free(Pointer.nativeValue(greedyPointer));
paramsPointer = null; greedyPointer = null;
} }
if (greedyParamsPointer != null) { if (beamPointer != null) {
Native.free(Pointer.nativeValue(greedyParamsPointer)); Native.free(Pointer.nativeValue(beamPointer));
greedyParamsPointer = null; beamPointer = null;
}
if (beamParamsPointer != null) {
Native.free(Pointer.nativeValue(beamParamsPointer));
beamParamsPointer = null;
} }
} }

View File

@ -5,7 +5,6 @@ import com.sun.jna.Native;
import com.sun.jna.Pointer; import com.sun.jna.Pointer;
import io.github.ggerganov.whispercpp.model.WhisperModelLoader; import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
import io.github.ggerganov.whispercpp.model.WhisperTokenData; import io.github.ggerganov.whispercpp.model.WhisperTokenData;
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
import io.github.ggerganov.whispercpp.params.WhisperFullParams; import io.github.ggerganov.whispercpp.params.WhisperFullParams;
public interface WhisperCppJnaLibrary extends Library { public interface WhisperCppJnaLibrary extends Library {
@ -14,32 +13,13 @@ public interface WhisperCppJnaLibrary extends Library {
String whisper_print_system_info(); String whisper_print_system_info();
/** /**
* DEPRECATED. Allocate (almost) all memory needed for the model by loading from a file. * Allocate (almost) all memory needed for the model by loading from a file.
* *
* @param path_model Path to the model file * @param path_model Path to the model file
* @return Whisper context on success, null on failure * @return Whisper context on success, null on failure
*/ */
Pointer whisper_init_from_file(String path_model); Pointer whisper_init_from_file(String path_model);
/**
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
* Because this function allocates memory for the params, the caller must call either:
* - call `whisper_free_context_params()`
* - `Native.free(Pointer.nativeValue(pointer));`
*/
Pointer whisper_context_default_params_by_ref();
void whisper_free_context_params(Pointer params);
/**
* Allocate (almost) all memory needed for the model by loading from a file.
*
* @param path_model Path to the model file
* @param params Pointer to whisper_context_params
* @return Whisper context on success, null on failure
*/
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
/** /**
* Allocate (almost) all memory needed for the model by loading from a buffer. * Allocate (almost) all memory needed for the model by loading from a buffer.
* *

View File

@ -1,31 +0,0 @@
package io.github.ggerganov.whispercpp.params;
import com.sun.jna.*;
import java.util.Arrays;
import java.util.List;
/**
* Parameters for the whisper_init_from_file_with_params() function.
* If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
* whisper_context_default_params()
*/
public class WhisperContextParams extends Structure {
public WhisperContextParams(Pointer p) {
super(p);
}
/** Use GPU for inference Number (default = true) */
public CBool use_gpu;
/** Use GPU for inference Number (default = true) */
public void useGpu(boolean enable) {
use_gpu = enable ? CBool.TRUE : CBool.FALSE;
}
@Override
protected List<String> getFieldOrder() {
return Arrays.asList("use_gpu");
}
}

View File

@ -20,7 +20,7 @@ struct whisper_context * g_context;
EMSCRIPTEN_BINDINGS(whisper) { EMSCRIPTEN_BINDINGS(whisper) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) { emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
if (g_context == nullptr) { if (g_context == nullptr) {
g_context = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params()); g_context = whisper_init_from_file(path_model.c_str());
if (g_context != nullptr) { if (g_context != nullptr) {
return true; return true;
} else { } else {

View File

@ -1,6 +1,6 @@
{ {
"name": "whisper.cpp", "name": "whisper.cpp",
"version": "1.4.3", "version": "1.4.2",
"description": "Whisper speech recognition", "description": "Whisper speech recognition",
"main": "whisper.js", "main": "whisper.js",
"scripts": { "scripts": {

View File

@ -87,7 +87,7 @@ static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) { if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context"); rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
} }
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params()); rw->context = whisper_init_from_file(StringValueCStr(whisper_model_file_path));
if (rw->context == nullptr) { if (rw->context == nullptr) {
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context"); rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
} }

View File

@ -123,7 +123,7 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
/** /**
Make a prediction using the convenience interface Make a prediction using the convenience interface
@param logmel_data as 1 × n_mel × 3000 3-dimensional array of floats: @param logmel_data as 1 × 80 × 3000 3-dimensional array of floats:
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL. @param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
@return the prediction as whisper_encoder_implOutput @return the prediction as whisper_encoder_implOutput
*/ */

View File

@ -3,8 +3,6 @@
// Code is derived from the work of Github user @wangchou // Code is derived from the work of Github user @wangchou
// ref: https://github.com/wangchou/callCoreMLFromCpp // ref: https://github.com/wangchou/callCoreMLFromCpp
#include <stdint.h>
#if __cplusplus #if __cplusplus
extern "C" { extern "C" {
#endif #endif
@ -16,8 +14,6 @@ void whisper_coreml_free(struct whisper_coreml_context * ctx);
void whisper_coreml_encode( void whisper_coreml_encode(
const whisper_coreml_context * ctx, const whisper_coreml_context * ctx,
int64_t n_ctx,
int64_t n_mel,
float * mel, float * mel,
float * out); float * out);

View File

@ -48,15 +48,13 @@ void whisper_coreml_free(struct whisper_coreml_context * ctx) {
void whisper_coreml_encode( void whisper_coreml_encode(
const whisper_coreml_context * ctx, const whisper_coreml_context * ctx,
int64_t n_ctx,
int64_t n_mel,
float * mel, float * mel,
float * out) { float * out) {
MLMultiArray * inMultiArray = [ MLMultiArray * inMultiArray = [
[MLMultiArray alloc] initWithDataPointer: mel [MLMultiArray alloc] initWithDataPointer: mel
shape: @[@1, @(n_mel), @(n_ctx)] shape: @[@1, @80, @3000]
dataType: MLMultiArrayDataTypeFloat32 dataType: MLMultiArrayDataTypeFloat32
strides: @[@(n_ctx*n_mel), @(n_ctx), @1] strides: @[@(240000), @(3000), @1]
deallocator: nil deallocator: nil
error: nil error: nil
]; ];

View File

@ -11,7 +11,6 @@ const whisperParamsMock = {
language: "en", language: "en",
model: path.join(__dirname, "../../../models/ggml-base.en.bin"), model: path.join(__dirname, "../../../models/ggml-base.en.bin"),
fname_inp: path.join(__dirname, "../../../samples/jfk.wav"), fname_inp: path.join(__dirname, "../../../samples/jfk.wav"),
use_gpu: true,
}; };
describe("Run whisper.node", () => { describe("Run whisper.node", () => {

View File

@ -36,7 +36,6 @@ struct whisper_params {
bool print_colors = false; bool print_colors = false;
bool print_progress = false; bool print_progress = false;
bool no_timestamps = false; bool no_timestamps = false;
bool use_gpu = true;
std::string language = "en"; std::string language = "en";
std::string prompt; std::string prompt;
@ -154,9 +153,7 @@ int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
// whisper init // whisper init
struct whisper_context_params cparams; struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
cparams.use_gpu = params.use_gpu;
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
if (ctx == nullptr) { if (ctx == nullptr) {
fprintf(stderr, "error: failed to initialize whisper context\n"); fprintf(stderr, "error: failed to initialize whisper context\n");
@ -318,12 +315,10 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
std::string language = whisper_params.Get("language").As<Napi::String>(); std::string language = whisper_params.Get("language").As<Napi::String>();
std::string model = whisper_params.Get("model").As<Napi::String>(); std::string model = whisper_params.Get("model").As<Napi::String>();
std::string input = whisper_params.Get("fname_inp").As<Napi::String>(); std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
bool use_gpu = whisper_params.Get("use_gpu").As<Napi::Boolean>();
params.language = language; params.language = language;
params.model = model; params.model = model;
params.fname_inp.emplace_back(input); params.fname_inp.emplace_back(input);
params.use_gpu = use_gpu;
Napi::Function callback = info[1].As<Napi::Function>(); Napi::Function callback = info[1].As<Napi::Function>();
Worker* worker = new Worker(callback, params); Worker* worker = new Worker(callback, params);

View File

@ -11,7 +11,6 @@ const whisperParams = {
language: "en", language: "en",
model: path.join(__dirname, "../../models/ggml-base.en.bin"), model: path.join(__dirname, "../../models/ggml-base.en.bin"),
fname_inp: "../../samples/jfk.wav", fname_inp: "../../samples/jfk.wav",
use_gpu: true,
}; };
const arguments = process.argv.slice(2); const arguments = process.argv.slice(2);

View File

@ -23,9 +23,7 @@ void bench_main(size_t index) {
fprintf(stderr, "%s: running benchmark with %d threads - please wait...\n", __func__, n_threads); fprintf(stderr, "%s: running benchmark with %d threads - please wait...\n", __func__, n_threads);
const int n_mels = whisper_model_n_mels(ctx); if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
fprintf(stderr, "error: failed to set mel: %d\n", ret); fprintf(stderr, "error: failed to set mel: %d\n", ret);
return; return;
} }
@ -59,7 +57,7 @@ EMSCRIPTEN_BINDINGS(bench) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) { emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
for (size_t i = 0; i < g_contexts.size(); ++i) { for (size_t i = 0; i < g_contexts.size(); ++i) {
if (g_contexts[i] == nullptr) { if (g_contexts[i] == nullptr) {
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params()); g_contexts[i] = whisper_init_from_file(path_model.c_str());
if (g_contexts[i] != nullptr) { if (g_contexts[i] != nullptr) {
if (g_worker.joinable()) { if (g_worker.joinable()) {
g_worker.join(); g_worker.join();

View File

@ -11,8 +11,6 @@ struct whisper_params {
int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat
std::string model = "models/ggml-base.en.bin"; std::string model = "models/ggml-base.en.bin";
bool use_gpu = true;
}; };
void whisper_print_usage(int argc, char ** argv, const whisper_params & params); void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -25,10 +23,9 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
whisper_print_usage(argc, argv, params); whisper_print_usage(argc, argv, params);
exit(0); exit(0);
} }
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); } else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; } else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); } else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
else { else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
whisper_print_usage(argc, argv, params); whisper_print_usage(argc, argv, params);
@ -48,7 +45,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads); fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str()); fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what); fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " %-7s 0 - whisper\n", ""); fprintf(stderr, " %-7s 0 - whisper\n", "");
fprintf(stderr, " %-7s 1 - memcpy\n", ""); fprintf(stderr, " %-7s 1 - memcpy\n", "");
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", ""); fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
@ -58,10 +54,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
int whisper_bench_full(const whisper_params & params) { int whisper_bench_full(const whisper_params & params) {
// whisper init // whisper init
struct whisper_context_params cparams; struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
cparams.use_gpu = params.use_gpu;
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
{ {
fprintf(stderr, "\n"); fprintf(stderr, "\n");
@ -73,9 +66,7 @@ int whisper_bench_full(const whisper_params & params) {
return 2; return 2;
} }
const int n_mels = whisper_model_n_mels(ctx); if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
fprintf(stderr, "error: failed to set mel: %d\n", ret); fprintf(stderr, "error: failed to set mel: %d\n", ret);
return 3; return 3;
} }

View File

@ -243,7 +243,7 @@ EMSCRIPTEN_BINDINGS(command) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) { emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
for (size_t i = 0; i < g_contexts.size(); ++i) { for (size_t i = 0; i < g_contexts.size(); ++i) {
if (g_contexts[i] == nullptr) { if (g_contexts[i] == nullptr) {
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params()); g_contexts[i] = whisper_init_from_file(path_model.c_str());
if (g_contexts[i] != nullptr) { if (g_contexts[i] != nullptr) {
g_running = true; g_running = true;
if (g_worker.joinable()) { if (g_worker.joinable()) {

View File

@ -38,7 +38,6 @@ struct whisper_params {
bool print_special = false; bool print_special = false;
bool print_energy = false; bool print_energy = false;
bool no_timestamps = true; bool no_timestamps = true;
bool use_gpu = true;
std::string language = "en"; std::string language = "en";
std::string model = "models/ggml-base.en.bin"; std::string model = "models/ggml-base.en.bin";
@ -69,7 +68,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
else if (arg == "-tr" || arg == "--translate") { params.translate = true; } else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; } else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; } else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; } else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; } else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; } else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
@ -103,7 +101,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false"); fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false"); fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false"); fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str()); fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str()); fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str()); fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
@ -613,10 +610,7 @@ int main(int argc, char ** argv) {
// whisper init // whisper init
struct whisper_context_params cparams; struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
cparams.use_gpu = params.use_gpu;
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
// print some info about the processing // print some info about the processing
{ {

View File

@ -181,7 +181,7 @@ private:
// It is assumed that PCM data is normalized to a range from -1 to 1 // It is assumed that PCM data is normalized to a range from -1 to 1
bool write_audio(const float * data, size_t length) { bool write_audio(const float * data, size_t length) {
for (size_t i = 0; i < length; ++i) { for (size_t i = 0; i < length; ++i) {
const int16_t intSample = data[i] * 32767; const auto intSample = static_cast<const int16_t>(data[i] * 32767);
file.write(reinterpret_cast<const char *>(&intSample), sizeof(int16_t)); file.write(reinterpret_cast<const char *>(&intSample), sizeof(int16_t));
dataSize += sizeof(int16_t); dataSize += sizeof(int16_t);
} }

View File

@ -48,7 +48,7 @@ if [ -n "$3" ]; then
fi fi
# Whisper models # Whisper models
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" ) models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
# list available models # list available models
function list_models { function list_models {

View File

@ -30,7 +30,6 @@ struct whisper_params {
bool translate = false; bool translate = false;
bool print_special = false; bool print_special = false;
bool print_energy = false; bool print_energy = false;
bool use_gpu = true;
std::string language = "en"; std::string language = "en";
std::string model = "models/ggml-base.en.bin"; std::string model = "models/ggml-base.en.bin";
@ -73,7 +72,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
else if (arg == "-tr" || arg == "--translate") { params.translate = true; } else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; } else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; } else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; } else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; } else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
else { else {
@ -104,7 +102,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false"); fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false"); fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false"); fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str()); fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str()); fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, "\n"); fprintf(stderr, "\n");
@ -435,9 +432,7 @@ int main(int argc, char ** argv) {
} }
// whisper init // whisper init
struct whisper_context_params cparams; struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
cparams.use_gpu = params.use_gpu;
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
// init audio // init audio
audio_async audio(30*1000); audio_async audio(30*1000);

View File

@ -90,7 +90,6 @@ struct whisper_params {
bool print_progress = false; bool print_progress = false;
bool no_timestamps = false; bool no_timestamps = false;
bool log_score = false; bool log_score = false;
bool use_gpu = true;
std::string language = "en"; std::string language = "en";
std::string prompt; std::string prompt;
@ -166,7 +165,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); } else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; } else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; } else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
else { else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
whisper_print_usage(argc, argv, params); whisper_print_usage(argc, argv, params);
@ -223,7 +221,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", ""); fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str()); fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false"); fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
@ -880,10 +877,7 @@ int main(int argc, char ** argv) {
// whisper init // whisper init
struct whisper_context_params cparams; struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
cparams.use_gpu = params.use_gpu;
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
if (ctx == nullptr) { if (ctx == nullptr) {
fprintf(stderr, "error: failed to initialize whisper context\n"); fprintf(stderr, "error: failed to initialize whisper context\n");

View File

@ -132,7 +132,7 @@ EMSCRIPTEN_BINDINGS(stream) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) { emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
for (size_t i = 0; i < g_contexts.size(); ++i) { for (size_t i = 0; i < g_contexts.size(); ++i) {
if (g_contexts[i] == nullptr) { if (g_contexts[i] == nullptr) {
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params()); g_contexts[i] = whisper_init_from_file(path_model.c_str());
if (g_contexts[i] != nullptr) { if (g_contexts[i] != nullptr) {
g_running = true; g_running = true;
if (g_worker.joinable()) { if (g_worker.joinable()) {

View File

@ -48,12 +48,11 @@ struct whisper_params {
bool no_context = true; bool no_context = true;
bool no_timestamps = false; bool no_timestamps = false;
bool tinydiarize = false; bool tinydiarize = false;
bool save_audio = false; // save audio to wav file
bool use_gpu = true;
std::string language = "en"; std::string language = "en";
std::string model = "models/ggml-base.en.bin"; std::string model = "models/ggml-base.en.bin";
std::string fname_out; std::string fname_out;
bool save_audio = false; // save audio to wav file
}; };
void whisper_print_usage(int argc, char ** argv, const whisper_params & params); void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -66,26 +65,25 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
whisper_print_usage(argc, argv, params); whisper_print_usage(argc, argv, params);
exit(0); exit(0);
} }
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); } else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
else if ( arg == "--step") { params.step_ms = std::stoi(argv[++i]); } else if ( arg == "--step") { params.step_ms = std::stoi(argv[++i]); }
else if ( arg == "--length") { params.length_ms = std::stoi(argv[++i]); } else if ( arg == "--length") { params.length_ms = std::stoi(argv[++i]); }
else if ( arg == "--keep") { params.keep_ms = std::stoi(argv[++i]); } else if ( arg == "--keep") { params.keep_ms = std::stoi(argv[++i]); }
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); } else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); } else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); } else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); } else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); } else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; } else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
else if (arg == "-tr" || arg == "--translate") { params.translate = true; } else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; } else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; }
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; } else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
else if (arg == "-kc" || arg == "--keep-context") { params.no_context = false; } else if (arg == "-kc" || arg == "--keep-context") { params.no_context = false; }
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; } else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; } else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; } else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; } else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; } else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
else { else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
@ -120,9 +118,8 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str()); fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str()); fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str()); fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false"); fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false"); fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU inference\n", params.use_gpu ? "false" : "true");
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
@ -166,10 +163,7 @@ int main(int argc, char ** argv) {
exit(0); exit(0);
} }
struct whisper_context_params cparams; struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
cparams.use_gpu = params.use_gpu;
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
std::vector<float> pcmf32 (n_samples_30s, 0.0f); std::vector<float> pcmf32 (n_samples_30s, 0.0f);
std::vector<float> pcmf32_old; std::vector<float> pcmf32_old;

View File

@ -14,8 +14,6 @@ if (WHISPER_SDL2)
../common-sdl.cpp ../common-sdl.cpp
../../ggml.c ../../ggml.c
../../ggml-alloc.c ../../ggml-alloc.c
../../ggml-backend.c
../../ggml-quants.c
../../whisper.cpp) ../../whisper.cpp)
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS} ../../) target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS} ../../)

View File

@ -63,7 +63,6 @@ struct whisper_params {
bool print_energy = false; bool print_energy = false;
bool no_timestamps = true; bool no_timestamps = true;
bool verbose_prompt = false; bool verbose_prompt = false;
bool use_gpu = true;
std::string person = "Georgi"; std::string person = "Georgi";
std::string language = "en"; std::string language = "en";
@ -85,26 +84,25 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
whisper_print_usage(argc, argv, params); whisper_print_usage(argc, argv, params);
exit(0); exit(0);
} }
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); } else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
else if (arg == "-vms" || arg == "--voice-ms") { params.voice_ms = std::stoi(argv[++i]); } else if (arg == "-vms" || arg == "--voice-ms") { params.voice_ms = std::stoi(argv[++i]); }
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); } else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); } else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); } else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); } else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); } else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; } else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
else if (arg == "-tr" || arg == "--translate") { params.translate = true; } else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; } else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; } else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
else if (arg == "-vp" || arg == "--verbose-prompt") { params.verbose_prompt = true; } else if (arg == "--verbose-prompt") { params.verbose_prompt = true; }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; } else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; } else if (arg == "--session") { params.path_session = argv[++i];}
else if (arg == "--session") { params.path_session = argv[++i];} else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; } else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; } else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; } else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; } else if (arg == "--prompt-file") {
else if (arg == "--prompt-file") {
std::ifstream file(argv[++i]); std::ifstream file(argv[++i]);
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt)); std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
if (params.prompt.back() == '\n') { if (params.prompt.back() == '\n') {
@ -112,7 +110,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
} }
} }
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; } else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
else { else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
whisper_print_usage(argc, argv, params); whisper_print_usage(argc, argv, params);
@ -128,28 +125,27 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "options:\n"); fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help [default] show this help message and exit\n"); fprintf(stderr, " -h, --help [default] show this help message and exit\n");
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads); fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
fprintf(stderr, " -vms N, --voice-ms N [%-7d] voice duration in milliseconds\n", params.voice_ms); fprintf(stderr, " -vms N, --voice-ms N [%-7d] voice duration in milliseconds\n", params.voice_ms);
fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.capture_id); fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.capture_id);
fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens); fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx); fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx);
fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.vad_thold); fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.vad_thold);
fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.freq_thold); fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.freq_thold);
fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false"); fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false"); fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false"); fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false"); fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
fprintf(stderr, " -vp, --verbose-prompt [%-7s] print prompt at start\n", params.verbose_prompt ? "true" : "false"); fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true"); fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str()); fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str()); fprintf(stderr, " -ml FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str()); fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
fprintf(stderr, " -ml FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str()); fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", "");
fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str()); fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n");
fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", ""); fprintf(stderr, " --verbose-prompt [%-7s] print prompt at start\n", params.verbose_prompt ? "true" : "false");
fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n"); fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
@ -248,7 +244,7 @@ int main(int argc, char ** argv) {
return 1; return 1;
} }
if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) { if (whisper_lang_id(params.language.c_str()) == -1) {
fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str()); fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
whisper_print_usage(argc, argv, params); whisper_print_usage(argc, argv, params);
exit(0); exit(0);
@ -256,19 +252,13 @@ int main(int argc, char ** argv) {
// whisper init // whisper init
struct whisper_context_params cparams; struct whisper_context * ctx_wsp = whisper_init_from_file(params.model_wsp.c_str());
cparams.use_gpu = params.use_gpu;
struct whisper_context * ctx_wsp = whisper_init_from_file_with_params(params.model_wsp.c_str(), cparams);
// llama init // llama init
llama_backend_init(true); llama_backend_init(true);
auto lmparams = llama_model_default_params(); auto lmparams = llama_model_default_params();
if (!params.use_gpu) {
lmparams.n_gpu_layers = 0;
}
struct llama_model * model_llama = llama_load_model_from_file(params.model_llama.c_str(), lmparams); struct llama_model * model_llama = llama_load_model_from_file(params.model_llama.c_str(), lmparams);

View File

@ -271,7 +271,7 @@ EMSCRIPTEN_BINDINGS(talk) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) { emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
for (size_t i = 0; i < g_contexts.size(); ++i) { for (size_t i = 0; i < g_contexts.size(); ++i) {
if (g_contexts[i] == nullptr) { if (g_contexts[i] == nullptr) {
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params()); g_contexts[i] = whisper_init_from_file(path_model.c_str());
if (g_contexts[i] != nullptr) { if (g_contexts[i] != nullptr) {
g_running = true; g_running = true;
if (g_worker.joinable()) { if (g_worker.joinable()) {

View File

@ -121,13 +121,13 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
return false; return false;
} }
char word[129]; std::string word;
for (int i = 0; i < n_vocab; i++) { for (int i = 0; i < n_vocab; i++) {
uint32_t len; uint32_t len;
fin.read((char *) &len, sizeof(len)); fin.read((char *) &len, sizeof(len));
word[len] = '\0';
fin.read((char *) word, len); word.resize(len);
fin.read((char *) word.data(), len);
vocab.token_to_id[word] = i; vocab.token_to_id[word] = i;
vocab.id_to_token[i] = word; vocab.id_to_token[i] = word;

View File

@ -31,7 +31,6 @@ struct whisper_params {
bool print_special = false; bool print_special = false;
bool print_energy = false; bool print_energy = false;
bool no_timestamps = true; bool no_timestamps = true;
bool use_gpu = true;
std::string person = "Santa"; std::string person = "Santa";
std::string language = "en"; std::string language = "en";
@ -62,7 +61,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
else if (arg == "-tr" || arg == "--translate") { params.translate = true; } else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; } else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; } else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; } else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; } else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; } else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
@ -96,7 +94,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false"); fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false"); fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false"); fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str()); fprintf(stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n", params.person.c_str());
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str()); fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str()); fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
@ -184,10 +181,8 @@ int main(int argc, char ** argv) {
} }
// whisper init // whisper init
struct whisper_context_params cparams;
cparams.use_gpu = params.use_gpu;
struct whisper_context * ctx_wsp = whisper_init_from_file_with_params(params.model_wsp.c_str(), cparams); struct whisper_context * ctx_wsp = whisper_init_from_file(params.model_wsp.c_str());
// gpt init // gpt init

View File

@ -21,7 +21,7 @@ help()
echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]" echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
echo "options:" echo "options:"
echo "-s Step in seconds (default is $step)." echo "-s Step in seconds (default is $step)."
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large' (default is '$model')." echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large' (default is '$model')."
echo "-t Number of threads to use." echo "-t Number of threads to use."
echo "-h Print this help page." echo "-h Print this help page."
echo echo

View File

@ -18,7 +18,9 @@ android {
vectorDrawables { vectorDrawables {
useSupportLibrary true useSupportLibrary true
} }
ndk {
abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
}
} }
buildTypes { buildTypes {
@ -41,10 +43,20 @@ android {
composeOptions { composeOptions {
kotlinCompilerExtensionVersion '1.5.0' kotlinCompilerExtensionVersion '1.5.0'
} }
ndkVersion "25.2.9519653"
externalNativeBuild {
cmake {
path = file("src/main/jni/whisper/CMakeLists.txt")
}
}
packagingOptions {
resources {
excludes += '/META-INF/{AL2.0,LGPL2.1}'
}
}
} }
dependencies { dependencies {
implementation project(':lib')
implementation 'androidx.activity:activity-compose:1.7.2' implementation 'androidx.activity:activity-compose:1.7.2'
implementation 'androidx.compose.material:material-icons-core:1.5.0' implementation 'androidx.compose.material:material-icons-core:1.5.0'
implementation 'androidx.compose.material3:material3:1.1.1' implementation 'androidx.compose.material3:material3:1.1.1'

View File

@ -15,7 +15,7 @@ import androidx.lifecycle.viewmodel.initializer
import androidx.lifecycle.viewmodel.viewModelFactory import androidx.lifecycle.viewmodel.viewModelFactory
import com.whispercppdemo.media.decodeWaveFile import com.whispercppdemo.media.decodeWaveFile
import com.whispercppdemo.recorder.Recorder import com.whispercppdemo.recorder.Recorder
import com.whispercpp.whisper.WhisperContext import com.whispercppdemo.whisper.WhisperContext
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.launch import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking import kotlinx.coroutines.runBlocking
@ -35,7 +35,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
private val modelsPath = File(application.filesDir, "models") private val modelsPath = File(application.filesDir, "models")
private val samplesPath = File(application.filesDir, "samples") private val samplesPath = File(application.filesDir, "samples")
private var recorder: Recorder = Recorder() private var recorder: Recorder = Recorder()
private var whisperContext: com.whispercpp.whisper.WhisperContext? = null private var whisperContext: WhisperContext? = null
private var mediaPlayer: MediaPlayer? = null private var mediaPlayer: MediaPlayer? = null
private var recordedFile: File? = null private var recordedFile: File? = null
@ -47,7 +47,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
} }
private suspend fun printSystemInfo() { private suspend fun printSystemInfo() {
printMessage(String.format("System Info: %s\n", com.whispercpp.whisper.WhisperContext.getSystemInfo())) printMessage(String.format("System Info: %s\n", WhisperContext.getSystemInfo()))
} }
private suspend fun loadData() { private suspend fun loadData() {
@ -78,7 +78,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
printMessage("Loading model...\n") printMessage("Loading model...\n")
val models = application.assets.list("models/") val models = application.assets.list("models/")
if (models != null) { if (models != null) {
whisperContext = com.whispercpp.whisper.WhisperContext.createContextFromAsset(application.assets, "models/" + models[0]) whisperContext = WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
printMessage("Loaded model ${models[0]}.\n") printMessage("Loaded model ${models[0]}.\n")
} }

View File

@ -1,4 +1,4 @@
package com.whispercpp.whisper package com.whispercppdemo.whisper
import android.content.res.AssetManager import android.content.res.AssetManager
import android.os.Build import android.os.Build

View File

@ -1,4 +1,4 @@
package com.whispercpp.whisper package com.whispercppdemo.whisper
import android.util.Log import android.util.Log
import java.io.BufferedReader import java.io.BufferedReader

View File

@ -127,11 +127,11 @@ static struct whisper_context *whisper_init_from_asset(
.close = &asset_close .close = &asset_close
}; };
return whisper_init_with_params(&loader, whisper_context_default_params()); return whisper_init(&loader);
} }
JNIEXPORT jlong JNICALL JNIEXPORT jlong JNICALL
Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContextFromAsset( Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromAsset(
JNIEnv *env, jobject thiz, jobject assetManager, jstring asset_path_str) { JNIEnv *env, jobject thiz, jobject assetManager, jstring asset_path_str) {
UNUSED(thiz); UNUSED(thiz);
struct whisper_context *context = NULL; struct whisper_context *context = NULL;
@ -142,18 +142,18 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContextFromAsset(
} }
JNIEXPORT jlong JNICALL JNIEXPORT jlong JNICALL
Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContext( Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
JNIEnv *env, jobject thiz, jstring model_path_str) { JNIEnv *env, jobject thiz, jstring model_path_str) {
UNUSED(thiz); UNUSED(thiz);
struct whisper_context *context = NULL; struct whisper_context *context = NULL;
const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL); const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL);
context = whisper_init_from_file_with_params(model_path_chars, whisper_context_default_params()); context = whisper_init_from_file(model_path_chars);
(*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars); (*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars);
return (jlong) context; return (jlong) context;
} }
JNIEXPORT void JNICALL JNIEXPORT void JNICALL
Java_com_whispercpp_whisper_WhisperLib_00024Companion_freeContext( Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_freeContext(
JNIEnv *env, jobject thiz, jlong context_ptr) { JNIEnv *env, jobject thiz, jlong context_ptr) {
UNUSED(env); UNUSED(env);
UNUSED(thiz); UNUSED(thiz);
@ -162,7 +162,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_freeContext(
} }
JNIEXPORT void JNICALL JNIEXPORT void JNICALL
Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe( Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_fullTranscribe(
JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) { JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) {
UNUSED(thiz); UNUSED(thiz);
struct whisper_context *context = (struct whisper_context *) context_ptr; struct whisper_context *context = (struct whisper_context *) context_ptr;
@ -194,7 +194,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe(
} }
JNIEXPORT jint JNICALL JNIEXPORT jint JNICALL
Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegmentCount( Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegmentCount(
JNIEnv *env, jobject thiz, jlong context_ptr) { JNIEnv *env, jobject thiz, jlong context_ptr) {
UNUSED(env); UNUSED(env);
UNUSED(thiz); UNUSED(thiz);
@ -203,7 +203,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegmentCount(
} }
JNIEXPORT jstring JNICALL JNIEXPORT jstring JNICALL
Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegment( Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegment(
JNIEnv *env, jobject thiz, jlong context_ptr, jint index) { JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
UNUSED(thiz); UNUSED(thiz);
struct whisper_context *context = (struct whisper_context *) context_ptr; struct whisper_context *context = (struct whisper_context *) context_ptr;
@ -213,7 +213,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegment(
} }
JNIEXPORT jstring JNICALL JNIEXPORT jstring JNICALL
Java_com_whispercpp_whisper_WhisperLib_00024Companion_getSystemInfo( Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getSystemInfo(
JNIEnv *env, jobject thiz JNIEnv *env, jobject thiz
) { ) {
UNUSED(thiz); UNUSED(thiz);
@ -223,7 +223,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getSystemInfo(
} }
JNIEXPORT jstring JNICALL JNIEXPORT jstring JNICALL
Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz, Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz,
jint n_threads) { jint n_threads) {
UNUSED(thiz); UNUSED(thiz);
const char *bench_ggml_memcpy = whisper_bench_memcpy_str(n_threads); const char *bench_ggml_memcpy = whisper_bench_memcpy_str(n_threads);
@ -231,7 +231,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, j
} }
JNIEXPORT jstring JNICALL JNIEXPORT jstring JNICALL
Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz, Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz,
jint n_threads) { jint n_threads) {
UNUSED(thiz); UNUSED(thiz);
const char *bench_ggml_mul_mat = whisper_bench_ggml_mul_mat_str(n_threads); const char *bench_ggml_mul_mat = whisper_bench_ggml_mul_mat_str(n_threads);

View File

@ -1 +0,0 @@
/build

View File

@ -1,51 +0,0 @@
plugins {
id 'com.android.library'
id 'org.jetbrains.kotlin.android'
}
android {
namespace 'com.whispercpp'
compileSdk 34
defaultConfig {
minSdk 26
targetSdk 34
versionCode 1
versionName "1.0"
ndk {
abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
}
}
buildTypes {
release {
minifyEnabled false
}
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
kotlinOptions {
jvmTarget = '1.8'
}
ndkVersion "25.2.9519653"
externalNativeBuild {
cmake {
path = file("src/main/jni/whisper/CMakeLists.txt")
}
}
packagingOptions {
resources {
excludes += '/META-INF/{AL2.0,LGPL2.1}'
}
}
}
dependencies {
implementation 'androidx.core:core-ktx:1.9.0'
implementation 'androidx.appcompat:appcompat:1.6.1'
implementation 'com.google.android.material:material:1.8.0'
}

View File

@ -1,4 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
</manifest>

View File

@ -14,4 +14,3 @@ dependencyResolutionManagement {
} }
rootProject.name = "WhisperCppDemo" rootProject.name = "WhisperCppDemo"
include ':app' include ':app'
include ':lib'

View File

@ -17,8 +17,8 @@
18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; }; 18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; };
18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; }; 18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; };
18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; }; 18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; };
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK -DGGML_USE_METAL"; }; }; 18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML"; }; };
18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL"; }; }; 18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE"; }; };
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; }; 18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.c */; }; 18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.c */; };
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; }; 18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };

View File

@ -61,13 +61,7 @@ void AudioInputCallback(void * inUserData,
NSLog(@"Loading model from %@", modelPath); NSLog(@"Loading model from %@", modelPath);
// create ggml context // create ggml context
stateInp.ctx = whisper_init_from_file([modelPath UTF8String]);
struct whisper_context_params cparams = whisper_context_default_params();
#if TARGET_OS_SIMULATOR
cparams.use_gpu = false;
NSLog(@"Running on simulator, using CPU");
#endif
stateInp.ctx = whisper_init_from_file_with_params([modelPath UTF8String], cparams);
// check if the model was loaded successfully // check if the model was loaded successfully
if (stateInp.ctx == NULL) { if (stateInp.ctx == NULL) {

View File

@ -1,5 +1,4 @@
import Foundation import Foundation
import whisper
enum WhisperError: Error { enum WhisperError: Error {
case couldNotInitializeContext case couldNotInitializeContext
@ -56,12 +55,7 @@ actor WhisperContext {
} }
static func createContext(path: String) throws -> WhisperContext { static func createContext(path: String) throws -> WhisperContext {
var params = whisper_context_default_params() let context = whisper_init_from_file(path)
#if targetEnvironment(simulator)
params.use_gpu = false
print("Running on the simulator, using CPU")
#endif
let context = whisper_init_from_file_with_params(path, params)
if let context { if let context {
return WhisperContext(context: context) return WhisperContext(context: context)
} else { } else {

View File

@ -0,0 +1,4 @@
//
// Use this file to import your target's public headers that you would like to expose to Swift.
//
#import "whisper.h"

View File

@ -15,9 +15,14 @@
0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9A29539CCF003032C3 /* WhisperCppDemoApp.swift */; }; 0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9A29539CCF003032C3 /* WhisperCppDemoApp.swift */; };
0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; }; 0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; };
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; }; 0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */; };
0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC729539EB0003032C3 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-Wno-shorten-64-to-32"; }; };
0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC929539EB0003032C3 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -Wno-shorten-64-to-32"; }; };
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; }; 0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; }; 0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */ = {isa = PBXBuildFile; productRef = E3F92DC42AFA8E3800A6A9D4 /* whisper */; }; 18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE14C2AF555FA0044A204 /* ggml-backend.c */; };
18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1512AF555FA0044A204 /* ggml-quants.c */; };
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 18AED47F2AB21F2B009D854F /* ggml-alloc.c */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXFileReference section */ /* Begin PBXFileReference section */
@ -31,9 +36,22 @@
0AAC5D9C29539CCF003032C3 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; }; 0AAC5D9C29539CCF003032C3 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
0AAC5D9E29539CD0003032C3 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; }; 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; }; 0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "WhisperCppDemo-Bridging-Header.h"; sourceTree = "<group>"; };
0AAC5DC729539EB0003032C3 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whisper.cpp; sourceTree = "<group>"; };
0AAC5DC829539EB0003032C3 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whisper.h; sourceTree = "<group>"; };
0AAC5DC929539EB0003032C3 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ggml.c; sourceTree = "<group>"; };
0AAC5DCA29539EB0003032C3 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ggml.h; sourceTree = "<group>"; };
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; }; 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; }; 0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = whisper.cpp; path = ../..; sourceTree = "<group>"; }; 18ABE14C2AF555FA0044A204 /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-backend.c"; sourceTree = "<group>"; };
18ABE14D2AF555FA0044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-backend.h"; sourceTree = "<group>"; };
18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-backend-impl.h"; sourceTree = "<group>"; };
18ABE14F2AF555FA0044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-quants.h"; sourceTree = "<group>"; };
18ABE1502AF555FA0044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-impl.h"; sourceTree = "<group>"; };
18ABE1512AF555FA0044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-quants.c"; sourceTree = "<group>"; };
18AED47F2AB21F2B009D854F /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-alloc.c"; sourceTree = "<group>"; };
18AED4802AB21F2B009D854F /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-alloc.h"; sourceTree = "<group>"; };
/* End PBXFileReference section */ /* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */ /* Begin PBXFrameworksBuildPhase section */
@ -41,7 +59,6 @@
isa = PBXFrameworksBuildPhase; isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@ -77,12 +94,11 @@
0AAC5D8E29539CCF003032C3 = { 0AAC5D8E29539CCF003032C3 = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */,
0A8E48FF2954B3F100704C1B /* README.md */, 0A8E48FF2954B3F100704C1B /* README.md */,
0AAC5DC529539E89003032C3 /* whisper.cpp */,
0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */, 0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */,
0AAC5D9929539CCF003032C3 /* whisper.swiftui.demo */, 0AAC5D9929539CCF003032C3 /* whisper.swiftui.demo */,
0AAC5D9829539CCF003032C3 /* Products */, 0AAC5D9829539CCF003032C3 /* Products */,
E3F92DC32AFA8E3800A6A9D4 /* Frameworks */,
); );
sourceTree = "<group>"; sourceTree = "<group>";
}; };
@ -107,9 +123,39 @@
path = whisper.swiftui.demo; path = whisper.swiftui.demo;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
0AAC5DA129539CD0003032C3 /* Preview Content */ = {
isa = PBXGroup;
children = (
0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */,
);
name = "Preview Content";
path = "../Preview Content";
sourceTree = "<group>";
};
0AAC5DC529539E89003032C3 /* whisper.cpp */ = {
isa = PBXGroup;
children = (
18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */,
18ABE14C2AF555FA0044A204 /* ggml-backend.c */,
18ABE14D2AF555FA0044A204 /* ggml-backend.h */,
18ABE1502AF555FA0044A204 /* ggml-impl.h */,
18ABE1512AF555FA0044A204 /* ggml-quants.c */,
18ABE14F2AF555FA0044A204 /* ggml-quants.h */,
18AED47F2AB21F2B009D854F /* ggml-alloc.c */,
18AED4802AB21F2B009D854F /* ggml-alloc.h */,
0AAC5DC929539EB0003032C3 /* ggml.c */,
0AAC5DCA29539EB0003032C3 /* ggml.h */,
0AAC5DC729539EB0003032C3 /* whisper.cpp */,
0AAC5DC829539EB0003032C3 /* whisper.h */,
);
name = whisper.cpp;
path = ../..;
sourceTree = "<group>";
};
0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */ = { 0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */,
0AAC5DD02953A394003032C3 /* LibWhisper.swift */, 0AAC5DD02953A394003032C3 /* LibWhisper.swift */,
); );
path = whisper.cpp.swift; path = whisper.cpp.swift;
@ -128,17 +174,11 @@
children = ( children = (
0AAC5D9E29539CD0003032C3 /* Assets.xcassets */, 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */,
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */, 0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */,
0AAC5DA129539CD0003032C3 /* Preview Content */,
); );
path = "Supporting files"; path = "Supporting files";
sourceTree = "<group>"; sourceTree = "<group>";
}; };
E3F92DC32AFA8E3800A6A9D4 /* Frameworks */ = {
isa = PBXGroup;
children = (
);
name = Frameworks;
sourceTree = "<group>";
};
/* End PBXGroup section */ /* End PBXGroup section */
/* Begin PBXNativeTarget section */ /* Begin PBXNativeTarget section */
@ -155,9 +195,6 @@
dependencies = ( dependencies = (
); );
name = whisper.swiftui; name = whisper.swiftui;
packageProductDependencies = (
E3F92DC42AFA8E3800A6A9D4 /* whisper */,
);
productName = WhisperCppDemo; productName = WhisperCppDemo;
productReference = 0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */; productReference = 0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */;
productType = "com.apple.product-type.application"; productType = "com.apple.product-type.application";
@ -202,6 +239,7 @@
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
0AA751482953AC2E001EE061 /* samples in Resources */, 0AA751482953AC2E001EE061 /* samples in Resources */,
0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */,
0A8E49002954B3F100704C1B /* README.md in Resources */, 0A8E49002954B3F100704C1B /* README.md in Resources */,
0AA751492953AC2E001EE061 /* models in Resources */, 0AA751492953AC2E001EE061 /* models in Resources */,
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */, 0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */,
@ -217,10 +255,15 @@
files = ( files = (
0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */, 0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */,
0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */, 0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */,
0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */,
18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */,
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */, 0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */, 0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */, 0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */,
0AA7514E2953D958001EE061 /* Recorder.swift in Sources */, 0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */,
18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@ -348,7 +391,7 @@
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1; CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\""; DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
DEVELOPMENT_TEAM = ""; DEVELOPMENT_TEAM = P8JZH34X63;
ENABLE_HARDENED_RUNTIME = YES; ENABLE_HARDENED_RUNTIME = YES;
ENABLE_PREVIEWS = YES; ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES; GENERATE_INFOPLIST_FILE = YES;
@ -372,6 +415,7 @@
SDKROOT = auto; SDKROOT = auto;
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
SWIFT_EMIT_LOC_STRINGS = YES; SWIFT_EMIT_LOC_STRINGS = YES;
SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
SWIFT_OPTIMIZATION_LEVEL = "-Onone"; SWIFT_OPTIMIZATION_LEVEL = "-Onone";
SWIFT_VERSION = 5.0; SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2"; TARGETED_DEVICE_FAMILY = "1,2";
@ -388,7 +432,7 @@
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1; CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\""; DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
DEVELOPMENT_TEAM = ""; DEVELOPMENT_TEAM = P8JZH34X63;
ENABLE_HARDENED_RUNTIME = YES; ENABLE_HARDENED_RUNTIME = YES;
ENABLE_PREVIEWS = YES; ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES; GENERATE_INFOPLIST_FILE = YES;
@ -417,6 +461,7 @@
SDKROOT = auto; SDKROOT = auto;
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
SWIFT_EMIT_LOC_STRINGS = YES; SWIFT_EMIT_LOC_STRINGS = YES;
SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
SWIFT_VERSION = 5.0; SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2"; TARGETED_DEVICE_FAMILY = "1,2";
}; };
@ -444,13 +489,6 @@
defaultConfigurationName = Release; defaultConfigurationName = Release;
}; };
/* End XCConfigurationList section */ /* End XCConfigurationList section */
/* Begin XCSwiftPackageProductDependency section */
E3F92DC42AFA8E3800A6A9D4 /* whisper */ = {
isa = XCSwiftPackageProductDependency;
productName = whisper;
};
/* End XCSwiftPackageProductDependency section */
}; };
rootObject = 0AAC5D8F29539CCF003032C3 /* Project object */; rootObject = 0AAC5D8F29539CCF003032C3 /* Project object */;
} }

View File

@ -24,7 +24,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
for (size_t i = 0; i < g_contexts.size(); ++i) { for (size_t i = 0; i < g_contexts.size(); ++i) {
if (g_contexts[i] == nullptr) { if (g_contexts[i] == nullptr) {
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params()); g_contexts[i] = whisper_init_from_file(path_model.c_str());
if (g_contexts[i] != nullptr) { if (g_contexts[i] != nullptr) {
return i + 1; return i + 1;
} else { } else {

View File

@ -18,11 +18,11 @@ else
fi fi
models=( \ models=( \
"tiny" "tiny-q4_0" "tiny-q4_1" "tiny-q5_0" "tiny-q5_1" "tiny-q8_0" \ "tiny" "tiny-q5_0" "tiny-q5_1" "tiny-q8_0" \
"base" "base-q4_0" "base-q4_1" "base-q5_0" "base-q5_1" "base-q8_0" \ "base" "base-q5_0" "base-q5_1" "base-q8_0" \
"small" "small-q4_0" "small-q4_1" "small-q5_0" "small-q5_1" "small-q8_0" \ "small" "small-q5_0" "small-q5_1" "small-q8_0" \
"medium" "medium-q4_0" "medium-q4_1" "medium-q5_0" "medium-q5_1" "medium-q8_0" \ "medium" "medium-q5_0" "medium-q5_1" "medium-q8_0" \
"large" "large-q4_0" "large-q4_1" "large-q5_0" "large-q5_1" "large-q8_0" \ "large" "large-q5_0" "large-q5_1" "large-q8_0" \
) )
if [ "$encoder_only" -eq 0 ]; then if [ "$encoder_only" -eq 0 ]; then
@ -83,10 +83,6 @@ for model in "${models[@]}"; do
config="$config COREML" config="$config COREML"
fi fi
if [[ $system_info == *"CUDA = 1"* ]]; then
config="$config CUDA"
fi
if [[ $system_info == *"METAL = 1"* ]]; then if [[ $system_info == *"METAL = 1"* ]]; then
config="$config METAL" config="$config METAL"
fi fi

View File

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" ) models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
for model in "${models[@]}"; do for model in "${models[@]}"; do
python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/ python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/

View File

@ -4476,13 +4476,6 @@ static __device__ void cpy_1_f32_f16(const char * cxi, char * cdsti) {
*dsti = __float2half(*xi); *dsti = __float2half(*xi);
} }
static __device__ void cpy_1_f16_f16(const char * cxi, char * cdsti) {
const half * xi = (const half *) cxi;
half * dsti = (half *) cdsti;
*dsti = *xi;
}
template <cpy_kernel_t cpy_1> template <cpy_kernel_t cpy_1>
static __global__ void cpy_f32_f16(const char * cx, char * cdst, const int ne, static __global__ void cpy_f32_f16(const char * cx, char * cdst, const int ne,
const int ne00, const int ne01, const int nb00, const int nb01, const int nb02, const int ne00, const int ne01, const int nb00, const int nb01, const int nb02,
@ -4736,25 +4729,6 @@ static __global__ void clamp_f32(const float * x, float * dst, const float min,
dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]); dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
} }
static __global__ void im2col_f32_f16(
const float * x, half * dst,
int ofs0, int ofs1, int IW, int IH, int CHW,
int s0, int s1, int p0, int p1, int d0, int d1) {
const int iiw = blockIdx.z * s0 + threadIdx.z * d0 - p0;
const int iih = blockIdx.y * s1 + threadIdx.y * d1 - p1;
const int offset_dst =
(threadIdx.x * gridDim.y * gridDim.z + blockIdx.y * gridDim.z + blockIdx.z) * CHW +
(blockIdx.x * (blockDim.y * blockDim.z) + threadIdx.y * blockDim.z + threadIdx.z);
if (!(iih < 0 || iih >= IH || iiw < 0 || iiw >= IW)) {
const int offset_src = threadIdx.x * ofs0 + blockIdx.x * ofs1;
dst[offset_dst] = __float2half(x[offset_src + iih * IW + iiw]);
} else {
dst[offset_dst] = __float2half(0.0f);
}
}
template<int qk, int qr, dequantize_kernel_t dq> template<int qk, int qr, dequantize_kernel_t dq>
static void get_rows_cuda(const void * x, const int32_t * y, float * dst, const int nrows, const int ncols, cudaStream_t stream) { static void get_rows_cuda(const void * x, const int32_t * y, float * dst, const int nrows, const int ncols, cudaStream_t stream) {
const dim3 block_dims(CUDA_GET_ROWS_BLOCK_SIZE, 1, 1); const dim3 block_dims(CUDA_GET_ROWS_BLOCK_SIZE, 1, 1);
@ -5644,16 +5618,6 @@ static void ggml_cpy_f32_f16_cuda(
(cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12); (cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12);
} }
static void ggml_cpy_f16_f16_cuda(
const char * cx, char * cdst, const int ne,
const int ne00, const int ne01, const int nb00, const int nb01, const int nb02,
const int ne10, const int ne11, const int nb10, const int nb11, const int nb12, cudaStream_t stream) {
const int num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE;
cpy_f32_f16<cpy_1_f16_f16><<<num_blocks, CUDA_CPY_BLOCK_SIZE, 0, stream>>>
(cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12);
}
static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) { static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) {
const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE; const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE;
scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k); scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k);
@ -5737,15 +5701,6 @@ static void soft_max_f32_cuda(const float * x, float * dst, const int ncols_x, c
soft_max_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols_x); soft_max_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols_x);
} }
static void im2col_f32_f16_cuda(const float * x, half * dst,
int OH, int IW, int IH, int OW, int IC,
int KH, int KW, int N, int ofs0, int ofs1,
int s0, int s1, int p0, int p1, int d0, int d1, cudaStream_t stream) {
dim3 block_nums(IC, OH, OW);
dim3 block_dims(N, KH, KW);
im2col_f32_f16<<<block_nums, block_dims, 0, stream>>>(x, dst, ofs0, ofs1, IW, IH, (IC * KH * KW), s0, s1, p0, p1, d0, d1);
}
// buffer pool for cuda // buffer pool for cuda
#define MAX_CUDA_BUFFERS 256 #define MAX_CUDA_BUFFERS 256
@ -6528,7 +6483,7 @@ inline void ggml_cuda_op_mul_mat_cublas(
src1_as_f16 = (half *) ggml_cuda_pool_malloc_async(ne * sizeof(half), &src1_as, id, stream); src1_as_f16 = (half *) ggml_cuda_pool_malloc_async(ne * sizeof(half), &src1_as, id, stream);
to_fp16_cuda(src1_ddf_i, src1_as_f16, ne, stream); to_fp16_cuda(src1_ddf_i, src1_as_f16, ne, stream);
} }
const half * src1_ptr = src1->type == GGML_TYPE_F16 ? (const half *) src1_ddf_i : src1_as_f16; const half * src1_ptr = src1->type == GGML_TYPE_F16 ? (const half *) src1_ddq_i : src1_as_f16;
size_t dst_f16_as = 0; size_t dst_f16_as = 0;
half * dst_f16 = (half *) ggml_cuda_pool_malloc_async(row_diff*src1_ncols * sizeof(half), &dst_f16_as, id, stream); half * dst_f16 = (half *) ggml_cuda_pool_malloc_async(row_diff*src1_ncols * sizeof(half), &dst_f16_as, id, stream);
@ -6704,45 +6659,6 @@ inline void ggml_cuda_op_alibi(
(void) src1_dd; (void) src1_dd;
} }
inline void ggml_cuda_op_im2col(
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F16);
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
const bool is_2D = ((const int32_t*)(dst->op_params))[6] == 1;
const int64_t N = src1->ne[is_2D ? 3 : 2];
const int64_t IC = src1->ne[is_2D ? 2 : 1];
const int64_t IH = is_2D ? src1->ne[1] : 1;
const int64_t IW = src1->ne[0];
const int64_t KH = is_2D ? src0->ne[1] : 1;
const int64_t KW = src0->ne[0];
const int64_t OH = is_2D ? dst->ne[2] : 1;
const int64_t OW = dst->ne[1];
const size_t ofs0 = src1->nb[is_2D ? 3 : 2] / 4; // nb is byte offset, src is type float32
const size_t ofs1 = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
im2col_f32_f16_cuda(src1_dd, (half*) dst_dd,
OH, IW, IH, OW, IC, KH, KW, N,
ofs0, ofs1, s0, s1, p0, p1, d0, d1, main_stream);
(void) src0;
(void) src0_dd;
}
inline void ggml_cuda_op_diag_mask_inf( inline void ggml_cuda_op_diag_mask_inf(
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) { const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) {
@ -7633,9 +7549,6 @@ static void ggml_cuda_cpy(const ggml_tensor * src0, const ggml_tensor * src1, gg
} else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F16) { } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F16) {
ggml_cpy_f32_f16_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ggml_cpy_f32_f16_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02,
ne10, ne11, nb10, nb11, nb12, main_stream); ne10, ne11, nb10, nb11, nb12, main_stream);
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16) {
ggml_cpy_f16_f16_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02,
ne10, ne11, nb10, nb11, nb12, main_stream);
} else { } else {
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__, fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
ggml_type_name(src0->type), ggml_type_name(src1->type)); ggml_type_name(src0->type), ggml_type_name(src1->type));
@ -7667,10 +7580,6 @@ static void ggml_cuda_alibi(const ggml_tensor * src0, const ggml_tensor * src1,
ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_alibi); ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_alibi);
} }
void ggml_cuda_im2col(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_im2col);
}
static void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
(void) src0; (void) src0;
(void) src1; (void) src1;
@ -8034,9 +7943,6 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
case GGML_OP_ALIBI: case GGML_OP_ALIBI:
func = ggml_cuda_alibi; func = ggml_cuda_alibi;
break; break;
case GGML_OP_IM2COL:
func = ggml_cuda_im2col;
break;
default: default:
return false; return false;
} }

View File

@ -86,7 +86,6 @@ struct ggml_metal_context {
GGML_METAL_DECL_KERNEL(rms_norm); GGML_METAL_DECL_KERNEL(rms_norm);
GGML_METAL_DECL_KERNEL(norm); GGML_METAL_DECL_KERNEL(norm);
GGML_METAL_DECL_KERNEL(mul_mv_f32_f32); GGML_METAL_DECL_KERNEL(mul_mv_f32_f32);
GGML_METAL_DECL_KERNEL(mul_mv_f16_f16);
GGML_METAL_DECL_KERNEL(mul_mv_f16_f32); GGML_METAL_DECL_KERNEL(mul_mv_f16_f32);
GGML_METAL_DECL_KERNEL(mul_mv_f16_f32_1row); GGML_METAL_DECL_KERNEL(mul_mv_f16_f32_1row);
GGML_METAL_DECL_KERNEL(mul_mv_f16_f32_l4); GGML_METAL_DECL_KERNEL(mul_mv_f16_f32_l4);
@ -115,7 +114,6 @@ struct ggml_metal_context {
GGML_METAL_DECL_KERNEL(rope_f32); GGML_METAL_DECL_KERNEL(rope_f32);
GGML_METAL_DECL_KERNEL(rope_f16); GGML_METAL_DECL_KERNEL(rope_f16);
GGML_METAL_DECL_KERNEL(alibi_f32); GGML_METAL_DECL_KERNEL(alibi_f32);
GGML_METAL_DECL_KERNEL(im2col_f16);
GGML_METAL_DECL_KERNEL(cpy_f32_f16); GGML_METAL_DECL_KERNEL(cpy_f32_f16);
GGML_METAL_DECL_KERNEL(cpy_f32_f32); GGML_METAL_DECL_KERNEL(cpy_f32_f32);
GGML_METAL_DECL_KERNEL(cpy_f16_f16); GGML_METAL_DECL_KERNEL(cpy_f16_f16);
@ -289,7 +287,6 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
GGML_METAL_ADD_KERNEL(rms_norm); GGML_METAL_ADD_KERNEL(rms_norm);
GGML_METAL_ADD_KERNEL(norm); GGML_METAL_ADD_KERNEL(norm);
GGML_METAL_ADD_KERNEL(mul_mv_f32_f32); GGML_METAL_ADD_KERNEL(mul_mv_f32_f32);
GGML_METAL_ADD_KERNEL(mul_mv_f16_f16);
GGML_METAL_ADD_KERNEL(mul_mv_f16_f32); GGML_METAL_ADD_KERNEL(mul_mv_f16_f32);
GGML_METAL_ADD_KERNEL(mul_mv_f16_f32_1row); GGML_METAL_ADD_KERNEL(mul_mv_f16_f32_1row);
GGML_METAL_ADD_KERNEL(mul_mv_f16_f32_l4); GGML_METAL_ADD_KERNEL(mul_mv_f16_f32_l4);
@ -320,7 +317,6 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
GGML_METAL_ADD_KERNEL(rope_f32); GGML_METAL_ADD_KERNEL(rope_f32);
GGML_METAL_ADD_KERNEL(rope_f16); GGML_METAL_ADD_KERNEL(rope_f16);
GGML_METAL_ADD_KERNEL(alibi_f32); GGML_METAL_ADD_KERNEL(alibi_f32);
GGML_METAL_ADD_KERNEL(im2col_f16);
GGML_METAL_ADD_KERNEL(cpy_f32_f16); GGML_METAL_ADD_KERNEL(cpy_f32_f16);
GGML_METAL_ADD_KERNEL(cpy_f32_f32); GGML_METAL_ADD_KERNEL(cpy_f32_f32);
GGML_METAL_ADD_KERNEL(cpy_f16_f16); GGML_METAL_ADD_KERNEL(cpy_f16_f16);
@ -390,7 +386,6 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
GGML_METAL_DEL_KERNEL(rms_norm); GGML_METAL_DEL_KERNEL(rms_norm);
GGML_METAL_DEL_KERNEL(norm); GGML_METAL_DEL_KERNEL(norm);
GGML_METAL_DEL_KERNEL(mul_mv_f32_f32); GGML_METAL_DEL_KERNEL(mul_mv_f32_f32);
GGML_METAL_DEL_KERNEL(mul_mv_f16_f16);
GGML_METAL_DEL_KERNEL(mul_mv_f16_f32); GGML_METAL_DEL_KERNEL(mul_mv_f16_f32);
GGML_METAL_DEL_KERNEL(mul_mv_f16_f32_1row); GGML_METAL_DEL_KERNEL(mul_mv_f16_f32_1row);
GGML_METAL_DEL_KERNEL(mul_mv_f16_f32_l4); GGML_METAL_DEL_KERNEL(mul_mv_f16_f32_l4);
@ -421,7 +416,6 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
GGML_METAL_DEL_KERNEL(rope_f32); GGML_METAL_DEL_KERNEL(rope_f32);
GGML_METAL_DEL_KERNEL(rope_f16); GGML_METAL_DEL_KERNEL(rope_f16);
GGML_METAL_DEL_KERNEL(alibi_f32); GGML_METAL_DEL_KERNEL(alibi_f32);
GGML_METAL_DEL_KERNEL(im2col_f16);
GGML_METAL_DEL_KERNEL(cpy_f32_f16); GGML_METAL_DEL_KERNEL(cpy_f32_f16);
GGML_METAL_DEL_KERNEL(cpy_f32_f32); GGML_METAL_DEL_KERNEL(cpy_f32_f32);
GGML_METAL_DEL_KERNEL(cpy_f16_f16); GGML_METAL_DEL_KERNEL(cpy_f16_f16);
@ -1036,7 +1030,7 @@ void ggml_metal_graph_compute(
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2]; [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2];
[encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3]; [encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3];
[encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4]; [encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4];
[encoder setThreadgroupMemoryLength:GGML_PAD(nth/32*sizeof(float), 16) atIndex:0]; [encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0];
[encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; [encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
} break; } break;
@ -1145,7 +1139,6 @@ void ggml_metal_graph_compute(
switch (src0t) { switch (src0t) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
GGML_ASSERT(src1t == GGML_TYPE_F32);
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f32_f32]; [encoder setComputePipelineState:ctx->pipeline_mul_mv_f32_f32];
nrows = 4; nrows = 4;
} break; } break;
@ -1153,18 +1146,13 @@ void ggml_metal_graph_compute(
{ {
nth0 = 32; nth0 = 32;
nth1 = 1; nth1 = 1;
if (src1t == GGML_TYPE_F32) { if (ne11 * ne12 < 4) {
if (ne11 * ne12 < 4) { [encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_1row];
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_1row]; } else if (ne00 >= 128 && ne01 >= 8 && ne00%4 == 0) {
} else if (ne00 >= 128 && ne01 >= 8 && ne00%4 == 0) { [encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_l4];
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_l4]; nrows = ne11;
nrows = ne11;
} else {
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32];
nrows = 4;
}
} else { } else {
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f16]; [encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32];
nrows = 4; nrows = 4;
} }
} break; } break;
@ -1354,7 +1342,7 @@ void ggml_metal_graph_compute(
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2]; [encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3]; [encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3];
[encoder setBytes:&eps length:sizeof( float) atIndex:4]; [encoder setBytes:&eps length:sizeof( float) atIndex:4];
[encoder setThreadgroupMemoryLength:GGML_PAD(nth/32*sizeof(float), 16) atIndex:0]; [encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0];
const int64_t nrows = ggml_nrows(src0); const int64_t nrows = ggml_nrows(src0);
@ -1373,7 +1361,7 @@ void ggml_metal_graph_compute(
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2]; [encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3]; [encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3];
[encoder setBytes:&eps length:sizeof( float) atIndex:4]; [encoder setBytes:&eps length:sizeof( float) atIndex:4];
[encoder setThreadgroupMemoryLength:GGML_PAD(nth*sizeof(float), 16) atIndex:0]; [encoder setThreadgroupMemoryLength:nth*sizeof(float) atIndex:0];
const int64_t nrows = ggml_nrows(src0); const int64_t nrows = ggml_nrows(src0);
@ -1476,58 +1464,6 @@ void ggml_metal_graph_compute(
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
} break; } break;
case GGML_OP_IM2COL:
{
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F16);
const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
const int32_t N = src1->ne[is_2D ? 3 : 2];
const int32_t IC = src1->ne[is_2D ? 2 : 1];
const int32_t IH = is_2D ? src1->ne[1] : 1;
const int32_t IW = src1->ne[0];
const int32_t KH = is_2D ? src0->ne[1] : 1;
const int32_t KW = src0->ne[0];
const int32_t OH = is_2D ? dst->ne[2] : 1;
const int32_t OW = dst->ne[1];
const int32_t CHW = IC * KH * KW;
const int32_t ofs0 = src1->nb[is_2D ? 3 : 2] / 4;
const int32_t ofs1 = src1->nb[is_2D ? 2 : 1] / 4;
switch (src0->type) {
case GGML_TYPE_F32: GGML_ASSERT(false && "not implemented"); break;
case GGML_TYPE_F16: [encoder setComputePipelineState:ctx->pipeline_im2col_f16]; break;
default: GGML_ASSERT(false);
};
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:0];
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
[encoder setBytes:&ofs0 length:sizeof( int32_t) atIndex:2];
[encoder setBytes:&ofs1 length:sizeof( int32_t) atIndex:3];
[encoder setBytes:&IW length:sizeof( int32_t) atIndex:4];
[encoder setBytes:&IH length:sizeof( int32_t) atIndex:5];
[encoder setBytes:&CHW length:sizeof( int32_t) atIndex:6];
[encoder setBytes:&s0 length:sizeof( int32_t) atIndex:7];
[encoder setBytes:&s1 length:sizeof( int32_t) atIndex:8];
[encoder setBytes:&p0 length:sizeof( int32_t) atIndex:9];
[encoder setBytes:&p1 length:sizeof( int32_t) atIndex:10];
[encoder setBytes:&d0 length:sizeof( int32_t) atIndex:11];
[encoder setBytes:&d1 length:sizeof( int32_t) atIndex:12];
[encoder dispatchThreadgroups:MTLSizeMake(IC, OH, OW) threadsPerThreadgroup:MTLSizeMake(N, KH, KW)];
} break;
case GGML_OP_DUP: case GGML_OP_DUP:
case GGML_OP_CPY: case GGML_OP_CPY:
case GGML_OP_CONT: case GGML_OP_CONT:

View File

@ -792,7 +792,7 @@ kernel void kernel_mul_mv_f32_f32(
constant int64_t & ne0, constant int64_t & ne0,
constant int64_t & ne1, constant int64_t & ne1,
uint3 tgpig[[threadgroup_position_in_grid]], uint3 tgpig[[threadgroup_position_in_grid]],
uint tiisg[[thread_index_in_simdgroup]]) { uint tiisg[[thread_index_in_simdgroup]]) {
const int64_t r0 = tgpig.x; const int64_t r0 = tgpig.x;
const int64_t rb = tgpig.y*N_F32_F32; const int64_t rb = tgpig.y*N_F32_F32;
@ -844,79 +844,6 @@ kernel void kernel_mul_mv_f32_f32(
} }
} }
#define N_F16_F16 4
kernel void kernel_mul_mv_f16_f16(
device const char * src0,
device const char * src1,
device float * dst,
constant int64_t & ne00,
constant int64_t & ne01,
constant int64_t & ne02,
constant uint64_t & nb00,
constant uint64_t & nb01,
constant uint64_t & nb02,
constant int64_t & ne10,
constant int64_t & ne11,
constant int64_t & ne12,
constant uint64_t & nb10,
constant uint64_t & nb11,
constant uint64_t & nb12,
constant int64_t & ne0,
constant int64_t & ne1,
uint3 tgpig[[threadgroup_position_in_grid]],
uint tiisg[[thread_index_in_simdgroup]]) {
const int64_t r0 = tgpig.x;
const int64_t rb = tgpig.y*N_F16_F16;
const int64_t im = tgpig.z;
device const half * x = (device const half *) (src0 + r0*nb01 + im/(ne12/ne02)*nb02);
if (ne00 < 128) {
for (int row = 0; row < N_F16_F16; ++row) {
int r1 = rb + row;
if (r1 >= ne11) {
break;
}
device const half * y = (device const half *) (src1 + r1*nb11 + im*nb12);
float sumf = 0;
for (int i = tiisg; i < ne00; i += 32) {
sumf += (half) x[i] * (half) y[i];
}
float all_sum = simd_sum(sumf);
if (tiisg == 0) {
dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
}
}
} else {
device const half4 * x4 = (device const half4 *)x;
for (int row = 0; row < N_F16_F16; ++row) {
int r1 = rb + row;
if (r1 >= ne11) {
break;
}
device const half * y = (device const half *) (src1 + r1*nb11 + im*nb12);
device const half4 * y4 = (device const half4 *) y;
float sumf = 0;
for (int i = tiisg; i < ne00/4; i += 32) {
for (int k = 0; k < 4; ++k) sumf += (half) x4[i][k] * y4[i][k];
}
float all_sum = simd_sum(sumf);
if (tiisg == 0) {
for (int i = 4*(ne00/4); i < ne00; ++i) all_sum += (half) x[i] * y[i];
dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
}
}
}
}
kernel void kernel_mul_mv_f16_f32_1row( kernel void kernel_mul_mv_f16_f32_1row(
device const char * src0, device const char * src0,
device const char * src1, device const char * src1,
@ -1302,39 +1229,6 @@ kernel void kernel_rope(
template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope<float>; template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope<float>;
template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope<half>; template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope<half>;
kernel void kernel_im2col_f16(
device const float * x,
device half * dst,
constant int32_t & ofs0,
constant int32_t & ofs1,
constant int32_t & IW,
constant int32_t & IH,
constant int32_t & CHW,
constant int32_t & s0,
constant int32_t & s1,
constant int32_t & p0,
constant int32_t & p1,
constant int32_t & d0,
constant int32_t & d1,
uint3 tgpig[[threadgroup_position_in_grid]],
uint3 tgpg[[threadgroups_per_grid]],
uint3 tpitg[[thread_position_in_threadgroup]],
uint3 ntg[[threads_per_threadgroup]]) {
const int32_t iiw = tgpig[2] * s0 + tpitg[2] * d0 - p0;
const int32_t iih = tgpig[1] * s1 + tpitg[1] * d1 - p1;
const int32_t offset_dst =
(tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
(tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]);
if (!(iih < 0 || iih >= IH || iiw < 0 || iiw >= IW)) {
const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1;
dst[offset_dst] = x[offset_src + iih * IW + iiw];
} else {
dst[offset_dst] = 0.0f;
}
}
kernel void kernel_cpy_f16_f16( kernel void kernel_cpy_f16_f16(
device const half * src0, device const half * src0,
device half * dst, device half * dst,

1071
ggml.c

File diff suppressed because it is too large Load Diff

19
ggml.h
View File

@ -403,8 +403,13 @@ extern "C" {
GGML_OP_ROPE_BACK, GGML_OP_ROPE_BACK,
GGML_OP_ALIBI, GGML_OP_ALIBI,
GGML_OP_CLAMP, GGML_OP_CLAMP,
GGML_OP_CONV_1D,
GGML_OP_CONV_1D_STAGE_0, // internal
GGML_OP_CONV_1D_STAGE_1, // internal
GGML_OP_CONV_TRANSPOSE_1D, GGML_OP_CONV_TRANSPOSE_1D,
GGML_OP_IM2COL, GGML_OP_CONV_2D,
GGML_OP_CONV_2D_STAGE_0, // internal
GGML_OP_CONV_2D_STAGE_1, // internal
GGML_OP_CONV_TRANSPOSE_2D, GGML_OP_CONV_TRANSPOSE_2D,
GGML_OP_POOL_1D, GGML_OP_POOL_1D,
GGML_OP_POOL_2D, GGML_OP_POOL_2D,
@ -1393,18 +1398,6 @@ extern "C" {
float min, float min,
float max); float max);
GGML_API struct ggml_tensor * ggml_im2col(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s0,
int s1,
int p0,
int p1,
int d0,
int d1,
bool is_2D);
GGML_API struct ggml_tensor * ggml_conv_1d( GGML_API struct ggml_tensor * ggml_conv_1d(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,

View File

@ -50,8 +50,7 @@ https://huggingface.co/ggerganov/whisper.cpp/tree/main
| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` | | medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
| medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` | | medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
| large-v1 | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` | | large-v1 | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
| large-v2 | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` | | large | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
| large | 2.9 GB | ~4.7 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
## Model files for testing purposes ## Model files for testing purposes

View File

@ -78,14 +78,14 @@ def convert_hf_whisper(hf_model_name_or_path: str, whisper_state_path: str):
# Ported from models/convert-whisper-to-coreml.py # Ported from models/convert-whisper-to-coreml.py
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True) parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True) parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False) parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False) parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False) parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
args = parser.parse_args() args = parser.parse_args()
if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]: if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
raise ValueError("Invalid model name") raise ValueError("Invalid model name")
pt_target_path = f"models/hf-{args.model_name}.pt" pt_target_path = f"models/hf-{args.model_name}.pt"

View File

@ -228,7 +228,7 @@ with np.load(dir_whisper / "whisper" / "assets" / "mel_filters.npz") as f:
# for backwards compatibility, also check for older hf_transformers format tokenizer files # for backwards compatibility, also check for older hf_transformers format tokenizer files
# old format: dir_whisper/whisper/assets/[multilingual/gpt2]/vocab.json # old format: dir_whisper/whisper/assets/[multilingual/gpt2]/vocab.json
# new format: dir_whisper/whisper/assets/[multilingual/gpt2].tiktoken # new format: dir_whisper/whisper/assets/[multilingual/gpt2].tiktoken
multilingual = hparams["n_vocab"] >= 51865 multilingual = hparams["n_vocab"] == 51865
tokenizer = dir_whisper / "whisper" / "assets" / (multilingual and "multilingual.tiktoken" or "gpt2.tiktoken") tokenizer = dir_whisper / "whisper" / "assets" / (multilingual and "multilingual.tiktoken" or "gpt2.tiktoken")
tokenizer_type = "tiktoken" tokenizer_type = "tiktoken"
if not tokenizer.is_file(): if not tokenizer.is_file():

View File

@ -194,7 +194,7 @@ class TextDecoderANE(TextDecoder):
x = x.permute(0,2,3,1).squeeze(0) x = x.permute(0,2,3,1).squeeze(0)
# ANE can only load tensors with dim size of at most 16,384 - whisper uses 51,864 (en) or 51,865 (multi-lang) tokens so we need to compute in chunks # ANE can only load tensors with dim size of at most 16,384 - whisper uses 51,864 (en) or 51,865 (multi-lang) tokens so we need to compute in chunks
if self.token_embedding.weight.shape[0] >= 51865: if self.token_embedding.weight.shape[0] == 51865:
# split in 11 chunks - 4715 each # split in 11 chunks - 4715 each
splits = self.token_embedding.weight.split(self.token_embedding.weight.shape[0]//11, dim=0) splits = self.token_embedding.weight.split(self.token_embedding.weight.shape[0]//11, dim=0)
logits = torch.cat([torch.einsum('bid,jd->bij', x, split) for split in splits]).view(*x.shape[:2], -1) logits = torch.cat([torch.einsum('bid,jd->bij', x, split) for split in splits]).view(*x.shape[:2], -1)
@ -252,7 +252,7 @@ class WhisperANE(Whisper):
def convert_encoder(hparams, model, quantize=False): def convert_encoder(hparams, model, quantize=False):
model.eval() model.eval()
input_shape = (1, hparams.n_mels, 3000) input_shape = (1, 80, 3000)
input_data = torch.randn(input_shape) input_data = torch.randn(input_shape)
traced_model = torch.jit.trace(model, input_data) traced_model = torch.jit.trace(model, input_data)
@ -296,13 +296,13 @@ def convert_decoder(hparams, model, quantize=False):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True) parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False) parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False) parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False) parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
args = parser.parse_args() args = parser.parse_args()
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large", "large-v1", "large-v2"]: if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
raise ValueError("Invalid model name") raise ValueError("Invalid model name")
whisper = load_model(args.model).cpu() whisper = load_model(args.model).cpu()

View File

@ -9,7 +9,7 @@ import shutil
def convert_encoder(hparams, encoder, mname): def convert_encoder(hparams, encoder, mname):
encoder.eval() encoder.eval()
mel = torch.zeros((1, hparams.n_mels, 3000)) mel = torch.zeros((1, 80, 3000))
onnx_folder=os.path.join(os.path.dirname(__file__),"onnx_encoder") onnx_folder=os.path.join(os.path.dirname(__file__),"onnx_encoder")
@ -38,10 +38,10 @@ def convert_encoder(hparams, encoder, mname):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True) parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
args = parser.parse_args() args = parser.parse_args()
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]: if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
raise ValueError("Invalid model name") raise ValueError("Invalid model name")
whisper = load_model(args.model).cpu() whisper = load_model(args.model).cpu()

View File

@ -19,7 +19,7 @@ function get_script_path() {
models_path="$(get_script_path)" models_path="$(get_script_path)"
# Whisper models # Whisper models
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" ) models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
# list available models # list available models
function list_models { function list_models {

View File

@ -8,7 +8,7 @@ popd
set argc=0 set argc=0
for %%x in (%*) do set /A argc+=1 for %%x in (%*) do set /A argc+=1
set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large
if %argc% neq 1 ( if %argc% neq 1 (
echo. echo.

View File

@ -41,7 +41,6 @@ models=(
"medium-q5_0" "medium-q5_0"
"medium.en-q5_0" "medium.en-q5_0"
"large-v1" "large-v1"
"large-v2"
"large" "large"
"large-q5_0" "large-q5_0"
) )

View File

@ -1 +0,0 @@
../ggml.h

View File

@ -1 +0,0 @@
../whisper.h

View File

@ -19,7 +19,7 @@
cd `dirname $0` cd `dirname $0`
# Whisper models # Whisper models
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" ) models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
# list available models # list available models
function list_models { function list_models {

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +1,10 @@
#ifndef WHISPER_H #ifndef WHISPER_H
#define WHISPER_H #define WHISPER_H
#include "ggml.h"
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <stdbool.h> #include <stdbool.h>
#ifdef __GNUC__
# define WHISPER_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
#elif defined(_MSC_VER)
# define WHISPER_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
#else
# define WHISPER_DEPRECATED(func, hint) func
#endif
#ifdef WHISPER_SHARED #ifdef WHISPER_SHARED
# ifdef _WIN32 # ifdef _WIN32
# ifdef WHISPER_BUILD # ifdef WHISPER_BUILD
@ -31,6 +21,7 @@
#define WHISPER_SAMPLE_RATE 16000 #define WHISPER_SAMPLE_RATE 16000
#define WHISPER_N_FFT 400 #define WHISPER_N_FFT 400
#define WHISPER_N_MEL 80
#define WHISPER_HOP_LENGTH 160 #define WHISPER_HOP_LENGTH 160
#define WHISPER_CHUNK_SIZE 30 #define WHISPER_CHUNK_SIZE 30
@ -80,10 +71,6 @@ extern "C" {
typedef int whisper_token; typedef int whisper_token;
struct whisper_context_params {
bool use_gpu;
};
typedef struct whisper_token_data { typedef struct whisper_token_data {
whisper_token id; // token id whisper_token id; // token id
whisper_token tid; // forced timestamp token id whisper_token tid; // forced timestamp token id
@ -112,40 +99,15 @@ extern "C" {
// Various functions for loading a ggml whisper model. // Various functions for loading a ggml whisper model.
// Allocate (almost) all memory needed for the model. // Allocate (almost) all memory needed for the model.
// Return NULL on failure // Return NULL on failure
WHISPER_API struct whisper_context * whisper_init_from_file_with_params (const char * path_model, struct whisper_context_params params); WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model);
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size, struct whisper_context_params params); WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size);
WHISPER_API struct whisper_context * whisper_init_with_params (struct whisper_model_loader * loader, struct whisper_context_params params); WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader);
// These are the same as the above, but the internal state of the context is not allocated automatically // These are the same as the above, but the internal state of the context is not allocated automatically
// It is the responsibility of the caller to allocate the state using whisper_init_state() (#523) // It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
WHISPER_API struct whisper_context * whisper_init_from_file_with_params_no_state (const char * path_model, struct whisper_context_params params); WHISPER_API struct whisper_context * whisper_init_from_file_no_state(const char * path_model);
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size, struct whisper_context_params params); WHISPER_API struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size);
WHISPER_API struct whisper_context * whisper_init_with_params_no_state (struct whisper_model_loader * loader, struct whisper_context_params params); WHISPER_API struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader);
WHISPER_DEPRECATED(
WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model),
"use whisper_init_from_file_with_params instead"
);
WHISPER_DEPRECATED(
WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size),
"use whisper_init_from_buffer_with_params instead"
);
WHISPER_DEPRECATED(
WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader),
"use whisper_init_with_params instead"
);
WHISPER_DEPRECATED(
WHISPER_API struct whisper_context * whisper_init_from_file_no_state(const char * path_model),
"use whisper_init_from_file_with_params_no_state instead"
);
WHISPER_DEPRECATED(
WHISPER_API struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size),
"use whisper_init_from_buffer_with_params_no_state instead"
);
WHISPER_DEPRECATED(
WHISPER_API struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader),
"use whisper_init_with_params_no_state instead"
);
WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx); WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
@ -170,7 +132,6 @@ extern "C" {
WHISPER_API void whisper_free (struct whisper_context * ctx); WHISPER_API void whisper_free (struct whisper_context * ctx);
WHISPER_API void whisper_free_state(struct whisper_state * state); WHISPER_API void whisper_free_state(struct whisper_state * state);
WHISPER_API void whisper_free_params(struct whisper_full_params * params); WHISPER_API void whisper_free_params(struct whisper_full_params * params);
WHISPER_API void whisper_free_context_params(struct whisper_context_params * params);
// Convert RAW PCM audio to log mel spectrogram. // Convert RAW PCM audio to log mel spectrogram.
// The resulting spectrogram is stored inside the default state of the provided whisper context. // The resulting spectrogram is stored inside the default state of the provided whisper context.
@ -481,9 +442,7 @@ extern "C" {
void * logits_filter_callback_user_data; void * logits_filter_callback_user_data;
}; };
// NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params() // NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_params()
WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref();
WHISPER_API struct whisper_context_params whisper_context_default_params(void);
WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy); WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy); WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
@ -572,7 +531,8 @@ extern "C" {
// Control logging output; default behavior is to print to stderr // Control logging output; default behavior is to print to stderr
WHISPER_API void whisper_log_set(ggml_log_callback log_callback, void * user_data); typedef void (*whisper_log_callback)(const char * line);
WHISPER_API void whisper_set_log_callback(whisper_log_callback callback);
#ifdef __cplusplus #ifdef __cplusplus
} }