mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-07-04 00:11:12 +02:00
Compare commits
4 Commits
ggml-backe
...
large-v3
Author | SHA1 | Date | |
---|---|---|---|
40be74271f | |||
a0c0d08d0f | |||
8fb0a1cd1c | |||
185d3fd6d9 |
6
.gitignore
vendored
6
.gitignore
vendored
@ -8,7 +8,6 @@
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
|
|
||||||
build/
|
build/
|
||||||
build-coreml/
|
|
||||||
build-em/
|
build-em/
|
||||||
build-debug/
|
build-debug/
|
||||||
build-release/
|
build-release/
|
||||||
@ -19,11 +18,6 @@ build-no-accel/
|
|||||||
build-sanitize-addr/
|
build-sanitize-addr/
|
||||||
build-sanitize-thread/
|
build-sanitize-thread/
|
||||||
|
|
||||||
# SPM
|
|
||||||
.build/
|
|
||||||
.swiftpm
|
|
||||||
*.metallib
|
|
||||||
|
|
||||||
/main
|
/main
|
||||||
/stream
|
/stream
|
||||||
/command
|
/command
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
cmake_minimum_required (VERSION 3.5)
|
cmake_minimum_required (VERSION 3.5)
|
||||||
|
|
||||||
project(whisper.cpp VERSION 1.4.3)
|
project(whisper.cpp VERSION 1.4.2)
|
||||||
|
|
||||||
# Add path to modules
|
# Add path to modules
|
||||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||||
|
42
Makefile
42
Makefile
@ -307,7 +307,7 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
|||||||
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
|
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
WHISPER_OBJ += ggml.o ggml-alloc.o ggml-backend.o ggml-quants.o
|
WHISPER_OBJ += ggml-alloc.o ggml-backend.o ggml-quants.o
|
||||||
|
|
||||||
whisper.o: whisper.cpp whisper.h ggml.h ggml-cuda.h
|
whisper.o: whisper.cpp whisper.h ggml.h ggml-cuda.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
@ -331,11 +331,11 @@ ggml-metal.o: ggml-metal.m ggml-metal.h
|
|||||||
WHISPER_OBJ += ggml-metal.o
|
WHISPER_OBJ += ggml-metal.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
libwhisper.a: $(WHISPER_OBJ)
|
libwhisper.a: ggml.o $(WHISPER_OBJ)
|
||||||
$(AR) rcs libwhisper.a $(WHISPER_OBJ)
|
$(AR) rcs libwhisper.a ggml.o $(WHISPER_OBJ)
|
||||||
|
|
||||||
libwhisper.so: $(WHISPER_OBJ)
|
libwhisper.so: ggml.o $(WHISPER_OBJ)
|
||||||
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so $(WHISPER_OBJ) $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o $(WHISPER_OBJ) $(LDFLAGS)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o main stream command talk talk-llama bench quantize lsp libwhisper.a libwhisper.so
|
rm -f *.o main stream command talk talk-llama bench quantize lsp libwhisper.a libwhisper.so
|
||||||
@ -349,30 +349,30 @@ CC_SDL=`sdl2-config --cflags --libs`
|
|||||||
SRC_COMMON = examples/common.cpp examples/common-ggml.cpp
|
SRC_COMMON = examples/common.cpp examples/common-ggml.cpp
|
||||||
SRC_COMMON_SDL = examples/common-sdl.cpp
|
SRC_COMMON_SDL = examples/common-sdl.cpp
|
||||||
|
|
||||||
main: examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ)
|
main: examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ)
|
||||||
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o main $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o main $(LDFLAGS)
|
||||||
./main -h
|
./main -h
|
||||||
|
|
||||||
bench: examples/bench/bench.cpp $(WHISPER_OBJ)
|
bench: examples/bench/bench.cpp ggml.o $(WHISPER_OBJ)
|
||||||
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp $(WHISPER_OBJ) -o bench $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o $(WHISPER_OBJ) -o bench $(LDFLAGS)
|
||||||
|
|
||||||
quantize: examples/quantize/quantize.cpp $(WHISPER_OBJ) $(SRC_COMMON)
|
quantize: examples/quantize/quantize.cpp ggml.o $(WHISPER_OBJ) $(SRC_COMMON)
|
||||||
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o quantize $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) ggml.o $(WHISPER_OBJ) -o quantize $(LDFLAGS)
|
||||||
|
|
||||||
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
|
||||||
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
|
||||||
|
|
||||||
command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
|
||||||
$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
|
||||||
|
|
||||||
lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
|
||||||
$(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS)
|
||||||
|
|
||||||
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
|
||||||
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
|
||||||
|
|
||||||
talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ)
|
||||||
$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
|
||||||
|
|
||||||
#
|
#
|
||||||
# Audio samples
|
# Audio samples
|
||||||
|
@ -1,77 +0,0 @@
|
|||||||
// swift-tools-version:5.5
|
|
||||||
|
|
||||||
import PackageDescription
|
|
||||||
|
|
||||||
#if arch(arm) || arch(arm64)
|
|
||||||
let platforms: [SupportedPlatform]? = [
|
|
||||||
.macOS(.v12),
|
|
||||||
.iOS(.v14),
|
|
||||||
.watchOS(.v4),
|
|
||||||
.tvOS(.v14)
|
|
||||||
]
|
|
||||||
let exclude: [String] = []
|
|
||||||
let resources: [Resource] = [
|
|
||||||
.process("ggml-metal.metal")
|
|
||||||
]
|
|
||||||
let additionalSources: [String] = ["ggml-metal.m"]
|
|
||||||
let additionalSettings: [CSetting] = [
|
|
||||||
.unsafeFlags(["-fno-objc-arc"]),
|
|
||||||
.define("GGML_USE_METAL")
|
|
||||||
]
|
|
||||||
#else
|
|
||||||
let platforms: [SupportedPlatform]? = nil
|
|
||||||
let exclude: [String] = ["ggml-metal.metal"]
|
|
||||||
let resources: [Resource] = []
|
|
||||||
let additionalSources: [String] = []
|
|
||||||
let additionalSettings: [CSetting] = []
|
|
||||||
#endif
|
|
||||||
|
|
||||||
let package = Package(
|
|
||||||
name: "whisper",
|
|
||||||
platforms: platforms,
|
|
||||||
products: [
|
|
||||||
.library(name: "whisper", targets: ["whisper"]),
|
|
||||||
],
|
|
||||||
targets: [
|
|
||||||
.target(
|
|
||||||
name: "whisper",
|
|
||||||
path: ".",
|
|
||||||
exclude: exclude + [
|
|
||||||
"bindings",
|
|
||||||
"cmake",
|
|
||||||
"coreml",
|
|
||||||
"examples",
|
|
||||||
"extra",
|
|
||||||
"models",
|
|
||||||
"samples",
|
|
||||||
"tests",
|
|
||||||
"CMakeLists.txt",
|
|
||||||
"ggml-cuda.cu",
|
|
||||||
"ggml-cuda.h",
|
|
||||||
"Makefile"
|
|
||||||
],
|
|
||||||
sources: [
|
|
||||||
"ggml.c",
|
|
||||||
"whisper.cpp",
|
|
||||||
"ggml-alloc.c",
|
|
||||||
"ggml-backend.c",
|
|
||||||
"ggml-quants.c"
|
|
||||||
] + additionalSources,
|
|
||||||
resources: resources,
|
|
||||||
publicHeadersPath: "spm-headers",
|
|
||||||
cSettings: [
|
|
||||||
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
|
||||||
.define("GGML_USE_ACCELERATE")
|
|
||||||
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
|
||||||
// We should consider add this in the future when we drop support for iOS 14
|
|
||||||
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
|
||||||
// .define("ACCELERATE_NEW_LAPACK"),
|
|
||||||
// .define("ACCELERATE_LAPACK_ILP64")
|
|
||||||
] + additionalSettings,
|
|
||||||
linkerSettings: [
|
|
||||||
.linkedFramework("Accelerate")
|
|
||||||
]
|
|
||||||
)
|
|
||||||
],
|
|
||||||
cxxLanguageStandard: .cxx11
|
|
||||||
)
|
|
@ -6,7 +6,7 @@
|
|||||||
[](https://opensource.org/licenses/MIT)
|
[](https://opensource.org/licenses/MIT)
|
||||||
[](https://www.npmjs.com/package/whisper.cpp/)
|
[](https://www.npmjs.com/package/whisper.cpp/)
|
||||||
|
|
||||||
Beta: [v1.4.3](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.3) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
Beta: [v1.4.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.2) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||||
|
|
||||||
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
||||||
|
|
||||||
|
Submodule bindings/ios updated: 9752de4100...44b39fd4ec
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "whisper.cpp",
|
"name": "whisper.cpp",
|
||||||
"version": "1.4.3",
|
"version": "1.4.2",
|
||||||
"description": "Whisper speech recognition",
|
"description": "Whisper speech recognition",
|
||||||
"main": "whisper.js",
|
"main": "whisper.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
@ -123,7 +123,7 @@ API_AVAILABLE(macos(12.0), ios(15.0), watchos(8.0), tvos(15.0)) __attribute__((v
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
Make a prediction using the convenience interface
|
Make a prediction using the convenience interface
|
||||||
@param logmel_data as 1 × n_mel × 3000 3-dimensional array of floats:
|
@param logmel_data as 1 × 80 × 3000 3-dimensional array of floats:
|
||||||
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
|
||||||
@return the prediction as whisper_encoder_implOutput
|
@return the prediction as whisper_encoder_implOutput
|
||||||
*/
|
*/
|
||||||
|
@ -3,8 +3,6 @@
|
|||||||
// Code is derived from the work of Github user @wangchou
|
// Code is derived from the work of Github user @wangchou
|
||||||
// ref: https://github.com/wangchou/callCoreMLFromCpp
|
// ref: https://github.com/wangchou/callCoreMLFromCpp
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#if __cplusplus
|
#if __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
@ -16,8 +14,6 @@ void whisper_coreml_free(struct whisper_coreml_context * ctx);
|
|||||||
|
|
||||||
void whisper_coreml_encode(
|
void whisper_coreml_encode(
|
||||||
const whisper_coreml_context * ctx,
|
const whisper_coreml_context * ctx,
|
||||||
int64_t n_ctx,
|
|
||||||
int64_t n_mel,
|
|
||||||
float * mel,
|
float * mel,
|
||||||
float * out);
|
float * out);
|
||||||
|
|
||||||
|
@ -48,15 +48,13 @@ void whisper_coreml_free(struct whisper_coreml_context * ctx) {
|
|||||||
|
|
||||||
void whisper_coreml_encode(
|
void whisper_coreml_encode(
|
||||||
const whisper_coreml_context * ctx,
|
const whisper_coreml_context * ctx,
|
||||||
int64_t n_ctx,
|
|
||||||
int64_t n_mel,
|
|
||||||
float * mel,
|
float * mel,
|
||||||
float * out) {
|
float * out) {
|
||||||
MLMultiArray * inMultiArray = [
|
MLMultiArray * inMultiArray = [
|
||||||
[MLMultiArray alloc] initWithDataPointer: mel
|
[MLMultiArray alloc] initWithDataPointer: mel
|
||||||
shape: @[@1, @(n_mel), @(n_ctx)]
|
shape: @[@1, @80, @3000]
|
||||||
dataType: MLMultiArrayDataTypeFloat32
|
dataType: MLMultiArrayDataTypeFloat32
|
||||||
strides: @[@(n_ctx*n_mel), @(n_ctx), @1]
|
strides: @[@(240000), @(3000), @1]
|
||||||
deallocator: nil
|
deallocator: nil
|
||||||
error: nil
|
error: nil
|
||||||
];
|
];
|
||||||
|
@ -181,7 +181,7 @@ private:
|
|||||||
// It is assumed that PCM data is normalized to a range from -1 to 1
|
// It is assumed that PCM data is normalized to a range from -1 to 1
|
||||||
bool write_audio(const float * data, size_t length) {
|
bool write_audio(const float * data, size_t length) {
|
||||||
for (size_t i = 0; i < length; ++i) {
|
for (size_t i = 0; i < length; ++i) {
|
||||||
const int16_t intSample = data[i] * 32767;
|
const auto intSample = static_cast<const int16_t>(data[i] * 32767);
|
||||||
file.write(reinterpret_cast<const char *>(&intSample), sizeof(int16_t));
|
file.write(reinterpret_cast<const char *>(&intSample), sizeof(int16_t));
|
||||||
dataSize += sizeof(int16_t);
|
dataSize += sizeof(int16_t);
|
||||||
}
|
}
|
||||||
|
@ -248,7 +248,7 @@ int main(int argc, char ** argv) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
|
if (whisper_lang_id(params.language.c_str()) == -1) {
|
||||||
fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
|
fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
|
||||||
whisper_print_usage(argc, argv, params);
|
whisper_print_usage(argc, argv, params);
|
||||||
exit(0);
|
exit(0);
|
||||||
|
@ -121,13 +121,13 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
char word[129];
|
std::string word;
|
||||||
|
|
||||||
for (int i = 0; i < n_vocab; i++) {
|
for (int i = 0; i < n_vocab; i++) {
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
fin.read((char *) &len, sizeof(len));
|
fin.read((char *) &len, sizeof(len));
|
||||||
word[len] = '\0';
|
|
||||||
fin.read((char *) word, len);
|
word.resize(len);
|
||||||
|
fin.read((char *) word.data(), len);
|
||||||
|
|
||||||
vocab.token_to_id[word] = i;
|
vocab.token_to_id[word] = i;
|
||||||
vocab.id_to_token[i] = word;
|
vocab.id_to_token[i] = word;
|
||||||
|
@ -18,7 +18,9 @@ android {
|
|||||||
vectorDrawables {
|
vectorDrawables {
|
||||||
useSupportLibrary true
|
useSupportLibrary true
|
||||||
}
|
}
|
||||||
|
ndk {
|
||||||
|
abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
buildTypes {
|
buildTypes {
|
||||||
@ -41,10 +43,20 @@ android {
|
|||||||
composeOptions {
|
composeOptions {
|
||||||
kotlinCompilerExtensionVersion '1.5.0'
|
kotlinCompilerExtensionVersion '1.5.0'
|
||||||
}
|
}
|
||||||
|
ndkVersion "25.2.9519653"
|
||||||
|
externalNativeBuild {
|
||||||
|
cmake {
|
||||||
|
path = file("src/main/jni/whisper/CMakeLists.txt")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
packagingOptions {
|
||||||
|
resources {
|
||||||
|
excludes += '/META-INF/{AL2.0,LGPL2.1}'
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation project(':lib')
|
|
||||||
implementation 'androidx.activity:activity-compose:1.7.2'
|
implementation 'androidx.activity:activity-compose:1.7.2'
|
||||||
implementation 'androidx.compose.material:material-icons-core:1.5.0'
|
implementation 'androidx.compose.material:material-icons-core:1.5.0'
|
||||||
implementation 'androidx.compose.material3:material3:1.1.1'
|
implementation 'androidx.compose.material3:material3:1.1.1'
|
||||||
|
@ -15,7 +15,7 @@ import androidx.lifecycle.viewmodel.initializer
|
|||||||
import androidx.lifecycle.viewmodel.viewModelFactory
|
import androidx.lifecycle.viewmodel.viewModelFactory
|
||||||
import com.whispercppdemo.media.decodeWaveFile
|
import com.whispercppdemo.media.decodeWaveFile
|
||||||
import com.whispercppdemo.recorder.Recorder
|
import com.whispercppdemo.recorder.Recorder
|
||||||
import com.whispercpp.whisper.WhisperContext
|
import com.whispercppdemo.whisper.WhisperContext
|
||||||
import kotlinx.coroutines.Dispatchers
|
import kotlinx.coroutines.Dispatchers
|
||||||
import kotlinx.coroutines.launch
|
import kotlinx.coroutines.launch
|
||||||
import kotlinx.coroutines.runBlocking
|
import kotlinx.coroutines.runBlocking
|
||||||
@ -35,7 +35,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
|||||||
private val modelsPath = File(application.filesDir, "models")
|
private val modelsPath = File(application.filesDir, "models")
|
||||||
private val samplesPath = File(application.filesDir, "samples")
|
private val samplesPath = File(application.filesDir, "samples")
|
||||||
private var recorder: Recorder = Recorder()
|
private var recorder: Recorder = Recorder()
|
||||||
private var whisperContext: com.whispercpp.whisper.WhisperContext? = null
|
private var whisperContext: WhisperContext? = null
|
||||||
private var mediaPlayer: MediaPlayer? = null
|
private var mediaPlayer: MediaPlayer? = null
|
||||||
private var recordedFile: File? = null
|
private var recordedFile: File? = null
|
||||||
|
|
||||||
@ -47,7 +47,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private suspend fun printSystemInfo() {
|
private suspend fun printSystemInfo() {
|
||||||
printMessage(String.format("System Info: %s\n", com.whispercpp.whisper.WhisperContext.getSystemInfo()))
|
printMessage(String.format("System Info: %s\n", WhisperContext.getSystemInfo()))
|
||||||
}
|
}
|
||||||
|
|
||||||
private suspend fun loadData() {
|
private suspend fun loadData() {
|
||||||
@ -78,7 +78,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
|||||||
printMessage("Loading model...\n")
|
printMessage("Loading model...\n")
|
||||||
val models = application.assets.list("models/")
|
val models = application.assets.list("models/")
|
||||||
if (models != null) {
|
if (models != null) {
|
||||||
whisperContext = com.whispercpp.whisper.WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
|
whisperContext = WhisperContext.createContextFromAsset(application.assets, "models/" + models[0])
|
||||||
printMessage("Loaded model ${models[0]}.\n")
|
printMessage("Loaded model ${models[0]}.\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package com.whispercpp.whisper
|
package com.whispercppdemo.whisper
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
import android.content.res.AssetManager
|
||||||
import android.os.Build
|
import android.os.Build
|
@ -1,4 +1,4 @@
|
|||||||
package com.whispercpp.whisper
|
package com.whispercppdemo.whisper
|
||||||
|
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
import java.io.BufferedReader
|
import java.io.BufferedReader
|
@ -131,7 +131,7 @@ static struct whisper_context *whisper_init_from_asset(
|
|||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT jlong JNICALL
|
JNIEXPORT jlong JNICALL
|
||||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContextFromAsset(
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromAsset(
|
||||||
JNIEnv *env, jobject thiz, jobject assetManager, jstring asset_path_str) {
|
JNIEnv *env, jobject thiz, jobject assetManager, jstring asset_path_str) {
|
||||||
UNUSED(thiz);
|
UNUSED(thiz);
|
||||||
struct whisper_context *context = NULL;
|
struct whisper_context *context = NULL;
|
||||||
@ -142,7 +142,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContextFromAsset(
|
|||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT jlong JNICALL
|
JNIEXPORT jlong JNICALL
|
||||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContext(
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
|
||||||
JNIEnv *env, jobject thiz, jstring model_path_str) {
|
JNIEnv *env, jobject thiz, jstring model_path_str) {
|
||||||
UNUSED(thiz);
|
UNUSED(thiz);
|
||||||
struct whisper_context *context = NULL;
|
struct whisper_context *context = NULL;
|
||||||
@ -153,7 +153,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_initContext(
|
|||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT void JNICALL
|
JNIEXPORT void JNICALL
|
||||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_freeContext(
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_freeContext(
|
||||||
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
||||||
UNUSED(env);
|
UNUSED(env);
|
||||||
UNUSED(thiz);
|
UNUSED(thiz);
|
||||||
@ -162,7 +162,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_freeContext(
|
|||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT void JNICALL
|
JNIEXPORT void JNICALL
|
||||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe(
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_fullTranscribe(
|
||||||
JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) {
|
JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) {
|
||||||
UNUSED(thiz);
|
UNUSED(thiz);
|
||||||
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
||||||
@ -194,7 +194,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe(
|
|||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT jint JNICALL
|
JNIEXPORT jint JNICALL
|
||||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegmentCount(
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegmentCount(
|
||||||
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
||||||
UNUSED(env);
|
UNUSED(env);
|
||||||
UNUSED(thiz);
|
UNUSED(thiz);
|
||||||
@ -203,7 +203,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegmentCount(
|
|||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT jstring JNICALL
|
JNIEXPORT jstring JNICALL
|
||||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegment(
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getTextSegment(
|
||||||
JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
|
JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
|
||||||
UNUSED(thiz);
|
UNUSED(thiz);
|
||||||
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
||||||
@ -213,7 +213,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getTextSegment(
|
|||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT jstring JNICALL
|
JNIEXPORT jstring JNICALL
|
||||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_getSystemInfo(
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_getSystemInfo(
|
||||||
JNIEnv *env, jobject thiz
|
JNIEnv *env, jobject thiz
|
||||||
) {
|
) {
|
||||||
UNUSED(thiz);
|
UNUSED(thiz);
|
||||||
@ -223,7 +223,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_getSystemInfo(
|
|||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT jstring JNICALL
|
JNIEXPORT jstring JNICALL
|
||||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz,
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, jobject thiz,
|
||||||
jint n_threads) {
|
jint n_threads) {
|
||||||
UNUSED(thiz);
|
UNUSED(thiz);
|
||||||
const char *bench_ggml_memcpy = whisper_bench_memcpy_str(n_threads);
|
const char *bench_ggml_memcpy = whisper_bench_memcpy_str(n_threads);
|
||||||
@ -231,7 +231,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchMemcpy(JNIEnv *env, j
|
|||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT jstring JNICALL
|
JNIEXPORT jstring JNICALL
|
||||||
Java_com_whispercpp_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz,
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_benchGgmlMulMat(JNIEnv *env, jobject thiz,
|
||||||
jint n_threads) {
|
jint n_threads) {
|
||||||
UNUSED(thiz);
|
UNUSED(thiz);
|
||||||
const char *bench_ggml_mul_mat = whisper_bench_ggml_mul_mat_str(n_threads);
|
const char *bench_ggml_mul_mat = whisper_bench_ggml_mul_mat_str(n_threads);
|
1
examples/whisper.android/lib/.gitignore
vendored
1
examples/whisper.android/lib/.gitignore
vendored
@ -1 +0,0 @@
|
|||||||
/build
|
|
@ -1,51 +0,0 @@
|
|||||||
plugins {
|
|
||||||
id 'com.android.library'
|
|
||||||
id 'org.jetbrains.kotlin.android'
|
|
||||||
}
|
|
||||||
|
|
||||||
android {
|
|
||||||
namespace 'com.whispercpp'
|
|
||||||
compileSdk 34
|
|
||||||
|
|
||||||
defaultConfig {
|
|
||||||
minSdk 26
|
|
||||||
targetSdk 34
|
|
||||||
versionCode 1
|
|
||||||
versionName "1.0"
|
|
||||||
|
|
||||||
ndk {
|
|
||||||
abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
buildTypes {
|
|
||||||
release {
|
|
||||||
minifyEnabled false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
compileOptions {
|
|
||||||
sourceCompatibility JavaVersion.VERSION_1_8
|
|
||||||
targetCompatibility JavaVersion.VERSION_1_8
|
|
||||||
}
|
|
||||||
kotlinOptions {
|
|
||||||
jvmTarget = '1.8'
|
|
||||||
}
|
|
||||||
|
|
||||||
ndkVersion "25.2.9519653"
|
|
||||||
externalNativeBuild {
|
|
||||||
cmake {
|
|
||||||
path = file("src/main/jni/whisper/CMakeLists.txt")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
packagingOptions {
|
|
||||||
resources {
|
|
||||||
excludes += '/META-INF/{AL2.0,LGPL2.1}'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
dependencies {
|
|
||||||
implementation 'androidx.core:core-ktx:1.9.0'
|
|
||||||
implementation 'androidx.appcompat:appcompat:1.6.1'
|
|
||||||
implementation 'com.google.android.material:material:1.8.0'
|
|
||||||
}
|
|
@ -1,4 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
|
|
||||||
|
|
||||||
</manifest>
|
|
@ -14,4 +14,3 @@ dependencyResolutionManagement {
|
|||||||
}
|
}
|
||||||
rootProject.name = "WhisperCppDemo"
|
rootProject.name = "WhisperCppDemo"
|
||||||
include ':app'
|
include ':app'
|
||||||
include ':lib'
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import Foundation
|
import Foundation
|
||||||
import whisper
|
|
||||||
|
|
||||||
enum WhisperError: Error {
|
enum WhisperError: Error {
|
||||||
case couldNotInitializeContext
|
case couldNotInitializeContext
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
//
|
||||||
|
// Use this file to import your target's public headers that you would like to expose to Swift.
|
||||||
|
//
|
||||||
|
#import "whisper.h"
|
@ -15,9 +15,16 @@
|
|||||||
0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9A29539CCF003032C3 /* WhisperCppDemoApp.swift */; };
|
0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9A29539CCF003032C3 /* WhisperCppDemoApp.swift */; };
|
||||||
0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; };
|
0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; };
|
||||||
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
|
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
|
||||||
|
0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */; };
|
||||||
|
0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC729539EB0003032C3 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_METAL -Wno-shorten-64-to-32"; }; };
|
||||||
|
0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC929539EB0003032C3 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -Wno-shorten-64-to-32"; }; };
|
||||||
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
|
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
|
||||||
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
|
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
|
||||||
E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */ = {isa = PBXBuildFile; productRef = E3F92DC42AFA8E3800A6A9D4 /* whisper */; };
|
18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE14C2AF555FA0044A204 /* ggml-backend.c */; };
|
||||||
|
18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1512AF555FA0044A204 /* ggml-quants.c */; };
|
||||||
|
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 18AED47F2AB21F2B009D854F /* ggml-alloc.c */; };
|
||||||
|
7FCB08262ACFA3A400AF3530 /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FCB08252ACFA3A400AF3530 /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
|
||||||
|
7FCB08282ACFA48500AF3530 /* ggml-metal.metal in Sources */ = {isa = PBXBuildFile; fileRef = 7FCB08272ACFA48500AF3530 /* ggml-metal.metal */; };
|
||||||
/* End PBXBuildFile section */
|
/* End PBXBuildFile section */
|
||||||
|
|
||||||
/* Begin PBXFileReference section */
|
/* Begin PBXFileReference section */
|
||||||
@ -31,9 +38,25 @@
|
|||||||
0AAC5D9C29539CCF003032C3 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
0AAC5D9C29539CCF003032C3 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
||||||
0AAC5D9E29539CD0003032C3 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
0AAC5D9E29539CD0003032C3 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
||||||
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
|
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
|
||||||
|
0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
|
||||||
|
0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "WhisperCppDemo-Bridging-Header.h"; sourceTree = "<group>"; };
|
||||||
|
0AAC5DC729539EB0003032C3 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whisper.cpp; sourceTree = "<group>"; };
|
||||||
|
0AAC5DC829539EB0003032C3 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whisper.h; sourceTree = "<group>"; };
|
||||||
|
0AAC5DC929539EB0003032C3 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ggml.c; sourceTree = "<group>"; };
|
||||||
|
0AAC5DCA29539EB0003032C3 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ggml.h; sourceTree = "<group>"; };
|
||||||
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
|
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
|
||||||
0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
|
0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
|
||||||
E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = whisper.cpp; path = ../..; sourceTree = "<group>"; };
|
18ABE14C2AF555FA0044A204 /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-backend.c"; sourceTree = "<group>"; };
|
||||||
|
18ABE14D2AF555FA0044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-backend.h"; sourceTree = "<group>"; };
|
||||||
|
18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-backend-impl.h"; sourceTree = "<group>"; };
|
||||||
|
18ABE14F2AF555FA0044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-quants.h"; sourceTree = "<group>"; };
|
||||||
|
18ABE1502AF555FA0044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-impl.h"; sourceTree = "<group>"; };
|
||||||
|
18ABE1512AF555FA0044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-quants.c"; sourceTree = "<group>"; };
|
||||||
|
18AED47F2AB21F2B009D854F /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-alloc.c"; sourceTree = "<group>"; };
|
||||||
|
18AED4802AB21F2B009D854F /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-alloc.h"; sourceTree = "<group>"; };
|
||||||
|
7FCB081E2ACFA04400AF3530 /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-metal.h"; sourceTree = "<group>"; };
|
||||||
|
7FCB08252ACFA3A400AF3530 /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "ggml-metal.m"; sourceTree = "<group>"; };
|
||||||
|
7FCB08272ACFA48500AF3530 /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = "ggml-metal.metal"; sourceTree = "<group>"; };
|
||||||
/* End PBXFileReference section */
|
/* End PBXFileReference section */
|
||||||
|
|
||||||
/* Begin PBXFrameworksBuildPhase section */
|
/* Begin PBXFrameworksBuildPhase section */
|
||||||
@ -41,7 +64,6 @@
|
|||||||
isa = PBXFrameworksBuildPhase;
|
isa = PBXFrameworksBuildPhase;
|
||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
files = (
|
files = (
|
||||||
E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */,
|
|
||||||
);
|
);
|
||||||
runOnlyForDeploymentPostprocessing = 0;
|
runOnlyForDeploymentPostprocessing = 0;
|
||||||
};
|
};
|
||||||
@ -77,12 +99,11 @@
|
|||||||
0AAC5D8E29539CCF003032C3 = {
|
0AAC5D8E29539CCF003032C3 = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */,
|
|
||||||
0A8E48FF2954B3F100704C1B /* README.md */,
|
0A8E48FF2954B3F100704C1B /* README.md */,
|
||||||
|
0AAC5DC529539E89003032C3 /* whisper.cpp */,
|
||||||
0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */,
|
0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */,
|
||||||
0AAC5D9929539CCF003032C3 /* whisper.swiftui.demo */,
|
0AAC5D9929539CCF003032C3 /* whisper.swiftui.demo */,
|
||||||
0AAC5D9829539CCF003032C3 /* Products */,
|
0AAC5D9829539CCF003032C3 /* Products */,
|
||||||
E3F92DC32AFA8E3800A6A9D4 /* Frameworks */,
|
|
||||||
);
|
);
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
};
|
};
|
||||||
@ -107,9 +128,42 @@
|
|||||||
path = whisper.swiftui.demo;
|
path = whisper.swiftui.demo;
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
};
|
};
|
||||||
|
0AAC5DA129539CD0003032C3 /* Preview Content */ = {
|
||||||
|
isa = PBXGroup;
|
||||||
|
children = (
|
||||||
|
0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */,
|
||||||
|
);
|
||||||
|
name = "Preview Content";
|
||||||
|
path = "../Preview Content";
|
||||||
|
sourceTree = "<group>";
|
||||||
|
};
|
||||||
|
0AAC5DC529539E89003032C3 /* whisper.cpp */ = {
|
||||||
|
isa = PBXGroup;
|
||||||
|
children = (
|
||||||
|
7FCB08272ACFA48500AF3530 /* ggml-metal.metal */,
|
||||||
|
7FCB081E2ACFA04400AF3530 /* ggml-metal.h */,
|
||||||
|
7FCB08252ACFA3A400AF3530 /* ggml-metal.m */,
|
||||||
|
18ABE14E2AF555FA0044A204 /* ggml-backend-impl.h */,
|
||||||
|
18ABE14C2AF555FA0044A204 /* ggml-backend.c */,
|
||||||
|
18ABE14D2AF555FA0044A204 /* ggml-backend.h */,
|
||||||
|
18ABE1502AF555FA0044A204 /* ggml-impl.h */,
|
||||||
|
18ABE1512AF555FA0044A204 /* ggml-quants.c */,
|
||||||
|
18ABE14F2AF555FA0044A204 /* ggml-quants.h */,
|
||||||
|
18AED47F2AB21F2B009D854F /* ggml-alloc.c */,
|
||||||
|
18AED4802AB21F2B009D854F /* ggml-alloc.h */,
|
||||||
|
0AAC5DC929539EB0003032C3 /* ggml.c */,
|
||||||
|
0AAC5DCA29539EB0003032C3 /* ggml.h */,
|
||||||
|
0AAC5DC729539EB0003032C3 /* whisper.cpp */,
|
||||||
|
0AAC5DC829539EB0003032C3 /* whisper.h */,
|
||||||
|
);
|
||||||
|
name = whisper.cpp;
|
||||||
|
path = ../..;
|
||||||
|
sourceTree = "<group>";
|
||||||
|
};
|
||||||
0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */ = {
|
0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */ = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
|
0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */,
|
||||||
0AAC5DD02953A394003032C3 /* LibWhisper.swift */,
|
0AAC5DD02953A394003032C3 /* LibWhisper.swift */,
|
||||||
);
|
);
|
||||||
path = whisper.cpp.swift;
|
path = whisper.cpp.swift;
|
||||||
@ -128,17 +182,11 @@
|
|||||||
children = (
|
children = (
|
||||||
0AAC5D9E29539CD0003032C3 /* Assets.xcassets */,
|
0AAC5D9E29539CD0003032C3 /* Assets.xcassets */,
|
||||||
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */,
|
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */,
|
||||||
|
0AAC5DA129539CD0003032C3 /* Preview Content */,
|
||||||
);
|
);
|
||||||
path = "Supporting files";
|
path = "Supporting files";
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
};
|
};
|
||||||
E3F92DC32AFA8E3800A6A9D4 /* Frameworks */ = {
|
|
||||||
isa = PBXGroup;
|
|
||||||
children = (
|
|
||||||
);
|
|
||||||
name = Frameworks;
|
|
||||||
sourceTree = "<group>";
|
|
||||||
};
|
|
||||||
/* End PBXGroup section */
|
/* End PBXGroup section */
|
||||||
|
|
||||||
/* Begin PBXNativeTarget section */
|
/* Begin PBXNativeTarget section */
|
||||||
@ -155,9 +203,6 @@
|
|||||||
dependencies = (
|
dependencies = (
|
||||||
);
|
);
|
||||||
name = whisper.swiftui;
|
name = whisper.swiftui;
|
||||||
packageProductDependencies = (
|
|
||||||
E3F92DC42AFA8E3800A6A9D4 /* whisper */,
|
|
||||||
);
|
|
||||||
productName = WhisperCppDemo;
|
productName = WhisperCppDemo;
|
||||||
productReference = 0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */;
|
productReference = 0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */;
|
||||||
productType = "com.apple.product-type.application";
|
productType = "com.apple.product-type.application";
|
||||||
@ -202,6 +247,7 @@
|
|||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
files = (
|
files = (
|
||||||
0AA751482953AC2E001EE061 /* samples in Resources */,
|
0AA751482953AC2E001EE061 /* samples in Resources */,
|
||||||
|
0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */,
|
||||||
0A8E49002954B3F100704C1B /* README.md in Resources */,
|
0A8E49002954B3F100704C1B /* README.md in Resources */,
|
||||||
0AA751492953AC2E001EE061 /* models in Resources */,
|
0AA751492953AC2E001EE061 /* models in Resources */,
|
||||||
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */,
|
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */,
|
||||||
@ -217,10 +263,17 @@
|
|||||||
files = (
|
files = (
|
||||||
0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */,
|
0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */,
|
||||||
0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */,
|
0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */,
|
||||||
|
0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */,
|
||||||
|
18ABE1532AF555FA0044A204 /* ggml-quants.c in Sources */,
|
||||||
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
|
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
|
||||||
|
7FCB08282ACFA48500AF3530 /* ggml-metal.metal in Sources */,
|
||||||
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
|
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
|
||||||
0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
|
0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
|
||||||
|
0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */,
|
||||||
0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
|
0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
|
||||||
|
7FCB08262ACFA3A400AF3530 /* ggml-metal.m in Sources */,
|
||||||
|
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */,
|
||||||
|
18ABE1522AF555FA0044A204 /* ggml-backend.c in Sources */,
|
||||||
);
|
);
|
||||||
runOnlyForDeploymentPostprocessing = 0;
|
runOnlyForDeploymentPostprocessing = 0;
|
||||||
};
|
};
|
||||||
@ -348,7 +401,7 @@
|
|||||||
CODE_SIGN_STYLE = Automatic;
|
CODE_SIGN_STYLE = Automatic;
|
||||||
CURRENT_PROJECT_VERSION = 1;
|
CURRENT_PROJECT_VERSION = 1;
|
||||||
DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
|
DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
|
||||||
DEVELOPMENT_TEAM = "";
|
DEVELOPMENT_TEAM = P8JZH34X63;
|
||||||
ENABLE_HARDENED_RUNTIME = YES;
|
ENABLE_HARDENED_RUNTIME = YES;
|
||||||
ENABLE_PREVIEWS = YES;
|
ENABLE_PREVIEWS = YES;
|
||||||
GENERATE_INFOPLIST_FILE = YES;
|
GENERATE_INFOPLIST_FILE = YES;
|
||||||
@ -372,6 +425,7 @@
|
|||||||
SDKROOT = auto;
|
SDKROOT = auto;
|
||||||
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
|
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
|
||||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||||
|
SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
|
||||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||||
SWIFT_VERSION = 5.0;
|
SWIFT_VERSION = 5.0;
|
||||||
TARGETED_DEVICE_FAMILY = "1,2";
|
TARGETED_DEVICE_FAMILY = "1,2";
|
||||||
@ -388,7 +442,7 @@
|
|||||||
CODE_SIGN_STYLE = Automatic;
|
CODE_SIGN_STYLE = Automatic;
|
||||||
CURRENT_PROJECT_VERSION = 1;
|
CURRENT_PROJECT_VERSION = 1;
|
||||||
DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
|
DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
|
||||||
DEVELOPMENT_TEAM = "";
|
DEVELOPMENT_TEAM = P8JZH34X63;
|
||||||
ENABLE_HARDENED_RUNTIME = YES;
|
ENABLE_HARDENED_RUNTIME = YES;
|
||||||
ENABLE_PREVIEWS = YES;
|
ENABLE_PREVIEWS = YES;
|
||||||
GENERATE_INFOPLIST_FILE = YES;
|
GENERATE_INFOPLIST_FILE = YES;
|
||||||
@ -417,6 +471,7 @@
|
|||||||
SDKROOT = auto;
|
SDKROOT = auto;
|
||||||
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
|
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
|
||||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||||
|
SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
|
||||||
SWIFT_VERSION = 5.0;
|
SWIFT_VERSION = 5.0;
|
||||||
TARGETED_DEVICE_FAMILY = "1,2";
|
TARGETED_DEVICE_FAMILY = "1,2";
|
||||||
};
|
};
|
||||||
@ -444,13 +499,6 @@
|
|||||||
defaultConfigurationName = Release;
|
defaultConfigurationName = Release;
|
||||||
};
|
};
|
||||||
/* End XCConfigurationList section */
|
/* End XCConfigurationList section */
|
||||||
|
|
||||||
/* Begin XCSwiftPackageProductDependency section */
|
|
||||||
E3F92DC42AFA8E3800A6A9D4 /* whisper */ = {
|
|
||||||
isa = XCSwiftPackageProductDependency;
|
|
||||||
productName = whisper;
|
|
||||||
};
|
|
||||||
/* End XCSwiftPackageProductDependency section */
|
|
||||||
};
|
};
|
||||||
rootObject = 0AAC5D8F29539CCF003032C3 /* Project object */;
|
rootObject = 0AAC5D8F29539CCF003032C3 /* Project object */;
|
||||||
}
|
}
|
||||||
|
@ -18,11 +18,11 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
models=( \
|
models=( \
|
||||||
"tiny" "tiny-q4_0" "tiny-q4_1" "tiny-q5_0" "tiny-q5_1" "tiny-q8_0" \
|
"tiny" "tiny-q5_0" "tiny-q5_1" "tiny-q8_0" \
|
||||||
"base" "base-q4_0" "base-q4_1" "base-q5_0" "base-q5_1" "base-q8_0" \
|
"base" "base-q5_0" "base-q5_1" "base-q8_0" \
|
||||||
"small" "small-q4_0" "small-q4_1" "small-q5_0" "small-q5_1" "small-q8_0" \
|
"small" "small-q5_0" "small-q5_1" "small-q8_0" \
|
||||||
"medium" "medium-q4_0" "medium-q4_1" "medium-q5_0" "medium-q5_1" "medium-q8_0" \
|
"medium" "medium-q5_0" "medium-q5_1" "medium-q8_0" \
|
||||||
"large" "large-q4_0" "large-q4_1" "large-q5_0" "large-q5_1" "large-q8_0" \
|
"large" "large-q5_0" "large-q5_1" "large-q8_0" \
|
||||||
)
|
)
|
||||||
|
|
||||||
if [ "$encoder_only" -eq 0 ]; then
|
if [ "$encoder_only" -eq 0 ]; then
|
||||||
@ -83,10 +83,6 @@ for model in "${models[@]}"; do
|
|||||||
config="$config COREML"
|
config="$config COREML"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ $system_info == *"CUDA = 1"* ]]; then
|
|
||||||
config="$config CUDA"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ $system_info == *"METAL = 1"* ]]; then
|
if [[ $system_info == *"METAL = 1"* ]]; then
|
||||||
config="$config METAL"
|
config="$config METAL"
|
||||||
fi
|
fi
|
||||||
|
@ -15,13 +15,33 @@ declare -a filedex
|
|||||||
cd `dirname $0`
|
cd `dirname $0`
|
||||||
cd ../
|
cd ../
|
||||||
|
|
||||||
for i in `ls ./models | grep ^ggml-.*.bin | grep -v "\-q"`; do
|
# Let's loop across all the objects in the 'models' dir:
|
||||||
m="models/$i"
|
for i in ./models/*; do
|
||||||
if [ -f "$m" ]; then
|
# Check to see if it's a file or directory
|
||||||
if [ "${m##*.}" == "bin" ]; then
|
if [ -d "$i" ]; then
|
||||||
./quantize "${m}" "${m::${#m}-4}-${qtype1}.bin" ${qtype1};
|
# It's a directory! We should make sure it's not empty first:
|
||||||
./quantize "${m}" "${m::${#m}-4}-${qtype0}.bin" ${qtype0};
|
if [ "$(ls -A $i)" ]; then
|
||||||
filedex+=( "${m::${#m}-4}-${qtype1}.bin" "${m::${#m}-4}-${qtype0}.bin" )
|
# Passed! Let's go searching for bin files (shouldn't need to go more than a layer deep here)
|
||||||
|
for f in "$i"/*.bin; do
|
||||||
|
# [Neuron Activation]
|
||||||
|
newfile=`echo "${f##*/}" | cut -d _ -f 1`;
|
||||||
|
if [ "$newfile" != "q5" ]; then
|
||||||
|
./quantize "${f}" "${i:-4}/${i:9:${#i}-4}-${qtype1}.bin" ${qtype1};
|
||||||
|
./quantize "${f}" "${i:-4}/${i:9:${#i}-4}-${qtype0}.bin" ${qtype0};
|
||||||
|
filedex+=( "${i:-4}/${i:9:${#i}-4}-${qtype1}.bin" "${i:-4}/${i:9:${#i}-4}-${qtype0}.bin" )
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# It's a file! Let's make sure it's the right type:
|
||||||
|
if [ "${i##*.}" == "bin" ]; then
|
||||||
|
# And we probably want to skip the testing files
|
||||||
|
if [ "${i:9:8}" != "for-test" ]; then
|
||||||
|
# [Neuron Activation]
|
||||||
|
./quantize "${i}" "${i:-4}-${qtype1}.bin" ${qtype1};
|
||||||
|
./quantize "${i}" "${i:-4}-${qtype0}.bin" ${qtype0};
|
||||||
|
filedex+=( "${i:-4}-${qtype1}.bin" "${i:-4}-${qtype0}.bin" )
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
96
ggml-cuda.cu
96
ggml-cuda.cu
@ -4476,13 +4476,6 @@ static __device__ void cpy_1_f32_f16(const char * cxi, char * cdsti) {
|
|||||||
*dsti = __float2half(*xi);
|
*dsti = __float2half(*xi);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __device__ void cpy_1_f16_f16(const char * cxi, char * cdsti) {
|
|
||||||
const half * xi = (const half *) cxi;
|
|
||||||
half * dsti = (half *) cdsti;
|
|
||||||
|
|
||||||
*dsti = *xi;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <cpy_kernel_t cpy_1>
|
template <cpy_kernel_t cpy_1>
|
||||||
static __global__ void cpy_f32_f16(const char * cx, char * cdst, const int ne,
|
static __global__ void cpy_f32_f16(const char * cx, char * cdst, const int ne,
|
||||||
const int ne00, const int ne01, const int nb00, const int nb01, const int nb02,
|
const int ne00, const int ne01, const int nb00, const int nb01, const int nb02,
|
||||||
@ -4736,25 +4729,6 @@ static __global__ void clamp_f32(const float * x, float * dst, const float min,
|
|||||||
dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
|
dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __global__ void im2col_f32_f16(
|
|
||||||
const float * x, half * dst,
|
|
||||||
int ofs0, int ofs1, int IW, int IH, int CHW,
|
|
||||||
int s0, int s1, int p0, int p1, int d0, int d1) {
|
|
||||||
const int iiw = blockIdx.z * s0 + threadIdx.z * d0 - p0;
|
|
||||||
const int iih = blockIdx.y * s1 + threadIdx.y * d1 - p1;
|
|
||||||
|
|
||||||
const int offset_dst =
|
|
||||||
(threadIdx.x * gridDim.y * gridDim.z + blockIdx.y * gridDim.z + blockIdx.z) * CHW +
|
|
||||||
(blockIdx.x * (blockDim.y * blockDim.z) + threadIdx.y * blockDim.z + threadIdx.z);
|
|
||||||
|
|
||||||
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
|
||||||
dst[offset_dst] = __float2half(0.0f);
|
|
||||||
} else {
|
|
||||||
const int offset_src = threadIdx.x * ofs0 + blockIdx.x * ofs1;
|
|
||||||
dst[offset_dst] = __float2half(x[offset_src + iih * IW + iiw]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int qk, int qr, dequantize_kernel_t dq>
|
template<int qk, int qr, dequantize_kernel_t dq>
|
||||||
static void get_rows_cuda(const void * x, const int32_t * y, float * dst, const int nrows, const int ncols, cudaStream_t stream) {
|
static void get_rows_cuda(const void * x, const int32_t * y, float * dst, const int nrows, const int ncols, cudaStream_t stream) {
|
||||||
const dim3 block_dims(CUDA_GET_ROWS_BLOCK_SIZE, 1, 1);
|
const dim3 block_dims(CUDA_GET_ROWS_BLOCK_SIZE, 1, 1);
|
||||||
@ -5644,16 +5618,6 @@ static void ggml_cpy_f32_f16_cuda(
|
|||||||
(cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12);
|
(cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cpy_f16_f16_cuda(
|
|
||||||
const char * cx, char * cdst, const int ne,
|
|
||||||
const int ne00, const int ne01, const int nb00, const int nb01, const int nb02,
|
|
||||||
const int ne10, const int ne11, const int nb10, const int nb11, const int nb12, cudaStream_t stream) {
|
|
||||||
|
|
||||||
const int num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE;
|
|
||||||
cpy_f32_f16<cpy_1_f16_f16><<<num_blocks, CUDA_CPY_BLOCK_SIZE, 0, stream>>>
|
|
||||||
(cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) {
|
static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) {
|
||||||
const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE;
|
const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE;
|
||||||
scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k);
|
scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k);
|
||||||
@ -5737,15 +5701,6 @@ static void soft_max_f32_cuda(const float * x, float * dst, const int ncols_x, c
|
|||||||
soft_max_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols_x);
|
soft_max_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols_x);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void im2col_f32_f16_cuda(const float * x, half * dst,
|
|
||||||
int OH, int IW, int IH, int OW, int IC,
|
|
||||||
int KH, int KW, int N, int ofs0, int ofs1,
|
|
||||||
int s0, int s1, int p0, int p1, int d0, int d1, cudaStream_t stream) {
|
|
||||||
dim3 block_nums(IC, OH, OW);
|
|
||||||
dim3 block_dims(N, KH, KW);
|
|
||||||
im2col_f32_f16<<<block_nums, block_dims, 0, stream>>>(x, dst, ofs0, ofs1, IW, IH, (IC * KH * KW), s0, s1, p0, p1, d0, d1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// buffer pool for cuda
|
// buffer pool for cuda
|
||||||
#define MAX_CUDA_BUFFERS 256
|
#define MAX_CUDA_BUFFERS 256
|
||||||
|
|
||||||
@ -6528,7 +6483,7 @@ inline void ggml_cuda_op_mul_mat_cublas(
|
|||||||
src1_as_f16 = (half *) ggml_cuda_pool_malloc_async(ne * sizeof(half), &src1_as, id, stream);
|
src1_as_f16 = (half *) ggml_cuda_pool_malloc_async(ne * sizeof(half), &src1_as, id, stream);
|
||||||
to_fp16_cuda(src1_ddf_i, src1_as_f16, ne, stream);
|
to_fp16_cuda(src1_ddf_i, src1_as_f16, ne, stream);
|
||||||
}
|
}
|
||||||
const half * src1_ptr = src1->type == GGML_TYPE_F16 ? (const half *) src1_ddf_i : src1_as_f16;
|
const half * src1_ptr = src1->type == GGML_TYPE_F16 ? (const half *) src1_ddq_i : src1_as_f16;
|
||||||
size_t dst_f16_as = 0;
|
size_t dst_f16_as = 0;
|
||||||
half * dst_f16 = (half *) ggml_cuda_pool_malloc_async(row_diff*src1_ncols * sizeof(half), &dst_f16_as, id, stream);
|
half * dst_f16 = (half *) ggml_cuda_pool_malloc_async(row_diff*src1_ncols * sizeof(half), &dst_f16_as, id, stream);
|
||||||
|
|
||||||
@ -6704,45 +6659,6 @@ inline void ggml_cuda_op_alibi(
|
|||||||
(void) src1_dd;
|
(void) src1_dd;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void ggml_cuda_op_im2col(
|
|
||||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
|
|
||||||
const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) {
|
|
||||||
|
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
||||||
GGML_ASSERT( dst->type == GGML_TYPE_F16);
|
|
||||||
|
|
||||||
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
|
||||||
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
|
||||||
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
|
||||||
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
|
||||||
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
|
||||||
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
|
||||||
|
|
||||||
const bool is_2D = ((const int32_t*)(dst->op_params))[6] == 1;
|
|
||||||
|
|
||||||
const int64_t N = src1->ne[is_2D ? 3 : 2];
|
|
||||||
const int64_t IC = src1->ne[is_2D ? 2 : 1];
|
|
||||||
const int64_t IH = is_2D ? src1->ne[1] : 1;
|
|
||||||
const int64_t IW = src1->ne[0];
|
|
||||||
|
|
||||||
const int64_t KH = is_2D ? src0->ne[1] : 1;
|
|
||||||
const int64_t KW = src0->ne[0];
|
|
||||||
|
|
||||||
const int64_t OH = is_2D ? dst->ne[2] : 1;
|
|
||||||
const int64_t OW = dst->ne[1];
|
|
||||||
|
|
||||||
const size_t ofs0 = src1->nb[is_2D ? 3 : 2] / 4; // nb is byte offset, src is type float32
|
|
||||||
const size_t ofs1 = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
|
|
||||||
|
|
||||||
im2col_f32_f16_cuda(src1_dd, (half*) dst_dd,
|
|
||||||
OH, IW, IH, OW, IC, KH, KW, N,
|
|
||||||
ofs0, ofs1, s0, s1, p0, p1, d0, d1, main_stream);
|
|
||||||
|
|
||||||
(void) src0;
|
|
||||||
(void) src0_dd;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void ggml_cuda_op_diag_mask_inf(
|
inline void ggml_cuda_op_diag_mask_inf(
|
||||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
|
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
|
||||||
const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) {
|
const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) {
|
||||||
@ -7633,9 +7549,6 @@ static void ggml_cuda_cpy(const ggml_tensor * src0, const ggml_tensor * src1, gg
|
|||||||
} else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F16) {
|
} else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F16) {
|
||||||
ggml_cpy_f32_f16_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02,
|
ggml_cpy_f32_f16_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02,
|
||||||
ne10, ne11, nb10, nb11, nb12, main_stream);
|
ne10, ne11, nb10, nb11, nb12, main_stream);
|
||||||
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16) {
|
|
||||||
ggml_cpy_f16_f16_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02,
|
|
||||||
ne10, ne11, nb10, nb11, nb12, main_stream);
|
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
|
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
|
||||||
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
||||||
@ -7667,10 +7580,6 @@ static void ggml_cuda_alibi(const ggml_tensor * src0, const ggml_tensor * src1,
|
|||||||
ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_alibi);
|
ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_alibi);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_cuda_im2col(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
|
||||||
ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_im2col);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
(void) src0;
|
(void) src0;
|
||||||
(void) src1;
|
(void) src1;
|
||||||
@ -8034,9 +7943,6 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
|
|||||||
case GGML_OP_ALIBI:
|
case GGML_OP_ALIBI:
|
||||||
func = ggml_cuda_alibi;
|
func = ggml_cuda_alibi;
|
||||||
break;
|
break;
|
||||||
case GGML_OP_IM2COL:
|
|
||||||
func = ggml_cuda_im2col;
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
// max memory buffers that can be mapped to the device
|
// max memory buffers that can be mapped to the device
|
||||||
#define GGML_METAL_MAX_BUFFERS 64
|
#define GGML_METAL_MAX_BUFFERS 16
|
||||||
#define GGML_METAL_MAX_COMMAND_BUFFERS 32
|
#define GGML_METAL_MAX_COMMAND_BUFFERS 32
|
||||||
|
|
||||||
struct ggml_tensor;
|
struct ggml_tensor;
|
||||||
|
80
ggml-metal.m
80
ggml-metal.m
@ -86,7 +86,6 @@ struct ggml_metal_context {
|
|||||||
GGML_METAL_DECL_KERNEL(rms_norm);
|
GGML_METAL_DECL_KERNEL(rms_norm);
|
||||||
GGML_METAL_DECL_KERNEL(norm);
|
GGML_METAL_DECL_KERNEL(norm);
|
||||||
GGML_METAL_DECL_KERNEL(mul_mv_f32_f32);
|
GGML_METAL_DECL_KERNEL(mul_mv_f32_f32);
|
||||||
GGML_METAL_DECL_KERNEL(mul_mv_f16_f16);
|
|
||||||
GGML_METAL_DECL_KERNEL(mul_mv_f16_f32);
|
GGML_METAL_DECL_KERNEL(mul_mv_f16_f32);
|
||||||
GGML_METAL_DECL_KERNEL(mul_mv_f16_f32_1row);
|
GGML_METAL_DECL_KERNEL(mul_mv_f16_f32_1row);
|
||||||
GGML_METAL_DECL_KERNEL(mul_mv_f16_f32_l4);
|
GGML_METAL_DECL_KERNEL(mul_mv_f16_f32_l4);
|
||||||
@ -115,7 +114,6 @@ struct ggml_metal_context {
|
|||||||
GGML_METAL_DECL_KERNEL(rope_f32);
|
GGML_METAL_DECL_KERNEL(rope_f32);
|
||||||
GGML_METAL_DECL_KERNEL(rope_f16);
|
GGML_METAL_DECL_KERNEL(rope_f16);
|
||||||
GGML_METAL_DECL_KERNEL(alibi_f32);
|
GGML_METAL_DECL_KERNEL(alibi_f32);
|
||||||
GGML_METAL_DECL_KERNEL(im2col_f16);
|
|
||||||
GGML_METAL_DECL_KERNEL(cpy_f32_f16);
|
GGML_METAL_DECL_KERNEL(cpy_f32_f16);
|
||||||
GGML_METAL_DECL_KERNEL(cpy_f32_f32);
|
GGML_METAL_DECL_KERNEL(cpy_f32_f32);
|
||||||
GGML_METAL_DECL_KERNEL(cpy_f16_f16);
|
GGML_METAL_DECL_KERNEL(cpy_f16_f16);
|
||||||
@ -289,7 +287,6 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
GGML_METAL_ADD_KERNEL(rms_norm);
|
GGML_METAL_ADD_KERNEL(rms_norm);
|
||||||
GGML_METAL_ADD_KERNEL(norm);
|
GGML_METAL_ADD_KERNEL(norm);
|
||||||
GGML_METAL_ADD_KERNEL(mul_mv_f32_f32);
|
GGML_METAL_ADD_KERNEL(mul_mv_f32_f32);
|
||||||
GGML_METAL_ADD_KERNEL(mul_mv_f16_f16);
|
|
||||||
GGML_METAL_ADD_KERNEL(mul_mv_f16_f32);
|
GGML_METAL_ADD_KERNEL(mul_mv_f16_f32);
|
||||||
GGML_METAL_ADD_KERNEL(mul_mv_f16_f32_1row);
|
GGML_METAL_ADD_KERNEL(mul_mv_f16_f32_1row);
|
||||||
GGML_METAL_ADD_KERNEL(mul_mv_f16_f32_l4);
|
GGML_METAL_ADD_KERNEL(mul_mv_f16_f32_l4);
|
||||||
@ -320,7 +317,6 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
GGML_METAL_ADD_KERNEL(rope_f32);
|
GGML_METAL_ADD_KERNEL(rope_f32);
|
||||||
GGML_METAL_ADD_KERNEL(rope_f16);
|
GGML_METAL_ADD_KERNEL(rope_f16);
|
||||||
GGML_METAL_ADD_KERNEL(alibi_f32);
|
GGML_METAL_ADD_KERNEL(alibi_f32);
|
||||||
GGML_METAL_ADD_KERNEL(im2col_f16);
|
|
||||||
GGML_METAL_ADD_KERNEL(cpy_f32_f16);
|
GGML_METAL_ADD_KERNEL(cpy_f32_f16);
|
||||||
GGML_METAL_ADD_KERNEL(cpy_f32_f32);
|
GGML_METAL_ADD_KERNEL(cpy_f32_f32);
|
||||||
GGML_METAL_ADD_KERNEL(cpy_f16_f16);
|
GGML_METAL_ADD_KERNEL(cpy_f16_f16);
|
||||||
@ -390,7 +386,6 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
|
|||||||
GGML_METAL_DEL_KERNEL(rms_norm);
|
GGML_METAL_DEL_KERNEL(rms_norm);
|
||||||
GGML_METAL_DEL_KERNEL(norm);
|
GGML_METAL_DEL_KERNEL(norm);
|
||||||
GGML_METAL_DEL_KERNEL(mul_mv_f32_f32);
|
GGML_METAL_DEL_KERNEL(mul_mv_f32_f32);
|
||||||
GGML_METAL_DEL_KERNEL(mul_mv_f16_f16);
|
|
||||||
GGML_METAL_DEL_KERNEL(mul_mv_f16_f32);
|
GGML_METAL_DEL_KERNEL(mul_mv_f16_f32);
|
||||||
GGML_METAL_DEL_KERNEL(mul_mv_f16_f32_1row);
|
GGML_METAL_DEL_KERNEL(mul_mv_f16_f32_1row);
|
||||||
GGML_METAL_DEL_KERNEL(mul_mv_f16_f32_l4);
|
GGML_METAL_DEL_KERNEL(mul_mv_f16_f32_l4);
|
||||||
@ -421,7 +416,6 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
|
|||||||
GGML_METAL_DEL_KERNEL(rope_f32);
|
GGML_METAL_DEL_KERNEL(rope_f32);
|
||||||
GGML_METAL_DEL_KERNEL(rope_f16);
|
GGML_METAL_DEL_KERNEL(rope_f16);
|
||||||
GGML_METAL_DEL_KERNEL(alibi_f32);
|
GGML_METAL_DEL_KERNEL(alibi_f32);
|
||||||
GGML_METAL_DEL_KERNEL(im2col_f16);
|
|
||||||
GGML_METAL_DEL_KERNEL(cpy_f32_f16);
|
GGML_METAL_DEL_KERNEL(cpy_f32_f16);
|
||||||
GGML_METAL_DEL_KERNEL(cpy_f32_f32);
|
GGML_METAL_DEL_KERNEL(cpy_f32_f32);
|
||||||
GGML_METAL_DEL_KERNEL(cpy_f16_f16);
|
GGML_METAL_DEL_KERNEL(cpy_f16_f16);
|
||||||
@ -479,10 +473,6 @@ static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, stru
|
|||||||
|
|
||||||
const int64_t tsize = ggml_nbytes(t);
|
const int64_t tsize = ggml_nbytes(t);
|
||||||
|
|
||||||
if (t->buffer && t->buffer->backend && t->buffer->backend->context) {
|
|
||||||
ctx = t->buffer->backend->context;
|
|
||||||
}
|
|
||||||
|
|
||||||
// find the view that contains the tensor fully
|
// find the view that contains the tensor fully
|
||||||
for (int i = 0; i < ctx->n_buffers; ++i) {
|
for (int i = 0; i < ctx->n_buffers; ++i) {
|
||||||
const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data;
|
const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data;
|
||||||
@ -1149,7 +1139,6 @@ void ggml_metal_graph_compute(
|
|||||||
switch (src0t) {
|
switch (src0t) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(src1t == GGML_TYPE_F32);
|
|
||||||
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f32_f32];
|
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f32_f32];
|
||||||
nrows = 4;
|
nrows = 4;
|
||||||
} break;
|
} break;
|
||||||
@ -1157,18 +1146,13 @@ void ggml_metal_graph_compute(
|
|||||||
{
|
{
|
||||||
nth0 = 32;
|
nth0 = 32;
|
||||||
nth1 = 1;
|
nth1 = 1;
|
||||||
if (src1t == GGML_TYPE_F32) {
|
if (ne11 * ne12 < 4) {
|
||||||
if (ne11 * ne12 < 4) {
|
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_1row];
|
||||||
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_1row];
|
} else if (ne00 >= 128 && ne01 >= 8 && ne00%4 == 0) {
|
||||||
} else if (ne00 >= 128 && ne01 >= 8 && ne00%4 == 0) {
|
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_l4];
|
||||||
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32_l4];
|
nrows = ne11;
|
||||||
nrows = ne11;
|
|
||||||
} else {
|
|
||||||
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32];
|
|
||||||
nrows = 4;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f16];
|
[encoder setComputePipelineState:ctx->pipeline_mul_mv_f16_f32];
|
||||||
nrows = 4;
|
nrows = 4;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
@ -1480,58 +1464,6 @@ void ggml_metal_graph_compute(
|
|||||||
|
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_IM2COL:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
||||||
GGML_ASSERT( dst->type == GGML_TYPE_F16);
|
|
||||||
|
|
||||||
const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
|
|
||||||
const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
|
|
||||||
const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
|
|
||||||
const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
|
|
||||||
const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
|
|
||||||
const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
|
|
||||||
const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
|
|
||||||
|
|
||||||
const int32_t N = src1->ne[is_2D ? 3 : 2];
|
|
||||||
const int32_t IC = src1->ne[is_2D ? 2 : 1];
|
|
||||||
const int32_t IH = is_2D ? src1->ne[1] : 1;
|
|
||||||
const int32_t IW = src1->ne[0];
|
|
||||||
|
|
||||||
const int32_t KH = is_2D ? src0->ne[1] : 1;
|
|
||||||
const int32_t KW = src0->ne[0];
|
|
||||||
|
|
||||||
const int32_t OH = is_2D ? dst->ne[2] : 1;
|
|
||||||
const int32_t OW = dst->ne[1];
|
|
||||||
|
|
||||||
const int32_t CHW = IC * KH * KW;
|
|
||||||
|
|
||||||
const int32_t ofs0 = src1->nb[is_2D ? 3 : 2] / 4;
|
|
||||||
const int32_t ofs1 = src1->nb[is_2D ? 2 : 1] / 4;
|
|
||||||
|
|
||||||
switch (src0->type) {
|
|
||||||
case GGML_TYPE_F32: GGML_ASSERT(false && "not implemented"); break;
|
|
||||||
case GGML_TYPE_F16: [encoder setComputePipelineState:ctx->pipeline_im2col_f16]; break;
|
|
||||||
default: GGML_ASSERT(false);
|
|
||||||
};
|
|
||||||
|
|
||||||
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:0];
|
|
||||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
|
||||||
[encoder setBytes:&ofs0 length:sizeof( int32_t) atIndex:2];
|
|
||||||
[encoder setBytes:&ofs1 length:sizeof( int32_t) atIndex:3];
|
|
||||||
[encoder setBytes:&IW length:sizeof( int32_t) atIndex:4];
|
|
||||||
[encoder setBytes:&IH length:sizeof( int32_t) atIndex:5];
|
|
||||||
[encoder setBytes:&CHW length:sizeof( int32_t) atIndex:6];
|
|
||||||
[encoder setBytes:&s0 length:sizeof( int32_t) atIndex:7];
|
|
||||||
[encoder setBytes:&s1 length:sizeof( int32_t) atIndex:8];
|
|
||||||
[encoder setBytes:&p0 length:sizeof( int32_t) atIndex:9];
|
|
||||||
[encoder setBytes:&p1 length:sizeof( int32_t) atIndex:10];
|
|
||||||
[encoder setBytes:&d0 length:sizeof( int32_t) atIndex:11];
|
|
||||||
[encoder setBytes:&d1 length:sizeof( int32_t) atIndex:12];
|
|
||||||
|
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(IC, OH, OW) threadsPerThreadgroup:MTLSizeMake(N, KH, KW)];
|
|
||||||
} break;
|
|
||||||
case GGML_OP_DUP:
|
case GGML_OP_DUP:
|
||||||
case GGML_OP_CPY:
|
case GGML_OP_CPY:
|
||||||
case GGML_OP_CONT:
|
case GGML_OP_CONT:
|
||||||
|
108
ggml-metal.metal
108
ggml-metal.metal
@ -792,7 +792,7 @@ kernel void kernel_mul_mv_f32_f32(
|
|||||||
constant int64_t & ne0,
|
constant int64_t & ne0,
|
||||||
constant int64_t & ne1,
|
constant int64_t & ne1,
|
||||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||||
uint tiisg[[thread_index_in_simdgroup]]) {
|
uint tiisg[[thread_index_in_simdgroup]]) {
|
||||||
|
|
||||||
const int64_t r0 = tgpig.x;
|
const int64_t r0 = tgpig.x;
|
||||||
const int64_t rb = tgpig.y*N_F32_F32;
|
const int64_t rb = tgpig.y*N_F32_F32;
|
||||||
@ -844,79 +844,6 @@ kernel void kernel_mul_mv_f32_f32(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define N_F16_F16 4
|
|
||||||
|
|
||||||
kernel void kernel_mul_mv_f16_f16(
|
|
||||||
device const char * src0,
|
|
||||||
device const char * src1,
|
|
||||||
device float * dst,
|
|
||||||
constant int64_t & ne00,
|
|
||||||
constant int64_t & ne01,
|
|
||||||
constant int64_t & ne02,
|
|
||||||
constant uint64_t & nb00,
|
|
||||||
constant uint64_t & nb01,
|
|
||||||
constant uint64_t & nb02,
|
|
||||||
constant int64_t & ne10,
|
|
||||||
constant int64_t & ne11,
|
|
||||||
constant int64_t & ne12,
|
|
||||||
constant uint64_t & nb10,
|
|
||||||
constant uint64_t & nb11,
|
|
||||||
constant uint64_t & nb12,
|
|
||||||
constant int64_t & ne0,
|
|
||||||
constant int64_t & ne1,
|
|
||||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
|
||||||
uint tiisg[[thread_index_in_simdgroup]]) {
|
|
||||||
|
|
||||||
const int64_t r0 = tgpig.x;
|
|
||||||
const int64_t rb = tgpig.y*N_F16_F16;
|
|
||||||
const int64_t im = tgpig.z;
|
|
||||||
|
|
||||||
device const half * x = (device const half *) (src0 + r0*nb01 + im/(ne12/ne02)*nb02);
|
|
||||||
|
|
||||||
if (ne00 < 128) {
|
|
||||||
for (int row = 0; row < N_F16_F16; ++row) {
|
|
||||||
int r1 = rb + row;
|
|
||||||
if (r1 >= ne11) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
device const half * y = (device const half *) (src1 + r1*nb11 + im*nb12);
|
|
||||||
|
|
||||||
float sumf = 0;
|
|
||||||
for (int i = tiisg; i < ne00; i += 32) {
|
|
||||||
sumf += (half) x[i] * (half) y[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
float all_sum = simd_sum(sumf);
|
|
||||||
if (tiisg == 0) {
|
|
||||||
dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
device const half4 * x4 = (device const half4 *)x;
|
|
||||||
for (int row = 0; row < N_F16_F16; ++row) {
|
|
||||||
int r1 = rb + row;
|
|
||||||
if (r1 >= ne11) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
device const half * y = (device const half *) (src1 + r1*nb11 + im*nb12);
|
|
||||||
device const half4 * y4 = (device const half4 *) y;
|
|
||||||
|
|
||||||
float sumf = 0;
|
|
||||||
for (int i = tiisg; i < ne00/4; i += 32) {
|
|
||||||
for (int k = 0; k < 4; ++k) sumf += (half) x4[i][k] * y4[i][k];
|
|
||||||
}
|
|
||||||
|
|
||||||
float all_sum = simd_sum(sumf);
|
|
||||||
if (tiisg == 0) {
|
|
||||||
for (int i = 4*(ne00/4); i < ne00; ++i) all_sum += (half) x[i] * y[i];
|
|
||||||
dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
kernel void kernel_mul_mv_f16_f32_1row(
|
kernel void kernel_mul_mv_f16_f32_1row(
|
||||||
device const char * src0,
|
device const char * src0,
|
||||||
device const char * src1,
|
device const char * src1,
|
||||||
@ -1302,39 +1229,6 @@ kernel void kernel_rope(
|
|||||||
template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope<float>;
|
template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope<float>;
|
||||||
template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope<half>;
|
template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope<half>;
|
||||||
|
|
||||||
kernel void kernel_im2col_f16(
|
|
||||||
device const float * x,
|
|
||||||
device half * dst,
|
|
||||||
constant int32_t & ofs0,
|
|
||||||
constant int32_t & ofs1,
|
|
||||||
constant int32_t & IW,
|
|
||||||
constant int32_t & IH,
|
|
||||||
constant int32_t & CHW,
|
|
||||||
constant int32_t & s0,
|
|
||||||
constant int32_t & s1,
|
|
||||||
constant int32_t & p0,
|
|
||||||
constant int32_t & p1,
|
|
||||||
constant int32_t & d0,
|
|
||||||
constant int32_t & d1,
|
|
||||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
|
||||||
uint3 tgpg[[threadgroups_per_grid]],
|
|
||||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
|
||||||
uint3 ntg[[threads_per_threadgroup]]) {
|
|
||||||
const int32_t iiw = tgpig[2] * s0 + tpitg[2] * d0 - p0;
|
|
||||||
const int32_t iih = tgpig[1] * s1 + tpitg[1] * d1 - p1;
|
|
||||||
|
|
||||||
const int32_t offset_dst =
|
|
||||||
(tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
|
|
||||||
(tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]);
|
|
||||||
|
|
||||||
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
|
||||||
dst[offset_dst] = 0.0f;
|
|
||||||
} else {
|
|
||||||
const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1;
|
|
||||||
dst[offset_dst] = x[offset_src + iih * IW + iiw];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
kernel void kernel_cpy_f16_f16(
|
kernel void kernel_cpy_f16_f16(
|
||||||
device const half * src0,
|
device const half * src0,
|
||||||
device half * dst,
|
device half * dst,
|
||||||
|
19
ggml.h
19
ggml.h
@ -403,8 +403,13 @@ extern "C" {
|
|||||||
GGML_OP_ROPE_BACK,
|
GGML_OP_ROPE_BACK,
|
||||||
GGML_OP_ALIBI,
|
GGML_OP_ALIBI,
|
||||||
GGML_OP_CLAMP,
|
GGML_OP_CLAMP,
|
||||||
|
GGML_OP_CONV_1D,
|
||||||
|
GGML_OP_CONV_1D_STAGE_0, // internal
|
||||||
|
GGML_OP_CONV_1D_STAGE_1, // internal
|
||||||
GGML_OP_CONV_TRANSPOSE_1D,
|
GGML_OP_CONV_TRANSPOSE_1D,
|
||||||
GGML_OP_IM2COL,
|
GGML_OP_CONV_2D,
|
||||||
|
GGML_OP_CONV_2D_STAGE_0, // internal
|
||||||
|
GGML_OP_CONV_2D_STAGE_1, // internal
|
||||||
GGML_OP_CONV_TRANSPOSE_2D,
|
GGML_OP_CONV_TRANSPOSE_2D,
|
||||||
GGML_OP_POOL_1D,
|
GGML_OP_POOL_1D,
|
||||||
GGML_OP_POOL_2D,
|
GGML_OP_POOL_2D,
|
||||||
@ -1393,18 +1398,6 @@ extern "C" {
|
|||||||
float min,
|
float min,
|
||||||
float max);
|
float max);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_im2col(
|
|
||||||
struct ggml_context * ctx,
|
|
||||||
struct ggml_tensor * a,
|
|
||||||
struct ggml_tensor * b,
|
|
||||||
int s0,
|
|
||||||
int s1,
|
|
||||||
int p0,
|
|
||||||
int p1,
|
|
||||||
int d0,
|
|
||||||
int d1,
|
|
||||||
bool is_2D);
|
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_conv_1d(
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
|
@ -252,7 +252,7 @@ class WhisperANE(Whisper):
|
|||||||
def convert_encoder(hparams, model, quantize=False):
|
def convert_encoder(hparams, model, quantize=False):
|
||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
input_shape = (1, hparams.n_mels, 3000)
|
input_shape = (1, 80, 3000)
|
||||||
input_data = torch.randn(input_shape)
|
input_data = torch.randn(input_shape)
|
||||||
traced_model = torch.jit.trace(model, input_data)
|
traced_model = torch.jit.trace(model, input_data)
|
||||||
|
|
||||||
@ -302,7 +302,7 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large", "large-v1", "large-v2"]:
|
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
|
||||||
raise ValueError("Invalid model name")
|
raise ValueError("Invalid model name")
|
||||||
|
|
||||||
whisper = load_model(args.model).cpu()
|
whisper = load_model(args.model).cpu()
|
||||||
|
@ -9,7 +9,7 @@ import shutil
|
|||||||
def convert_encoder(hparams, encoder, mname):
|
def convert_encoder(hparams, encoder, mname):
|
||||||
encoder.eval()
|
encoder.eval()
|
||||||
|
|
||||||
mel = torch.zeros((1, hparams.n_mels, 3000))
|
mel = torch.zeros((1, 80, 3000))
|
||||||
|
|
||||||
onnx_folder=os.path.join(os.path.dirname(__file__),"onnx_encoder")
|
onnx_folder=os.path.join(os.path.dirname(__file__),"onnx_encoder")
|
||||||
|
|
||||||
|
@ -1 +0,0 @@
|
|||||||
../ggml.h
|
|
@ -1 +0,0 @@
|
|||||||
../whisper.h
|
|
1039
whisper.cpp
1039
whisper.cpp
File diff suppressed because it is too large
Load Diff
17
whisper.h
17
whisper.h
@ -1,8 +1,6 @@
|
|||||||
#ifndef WHISPER_H
|
#ifndef WHISPER_H
|
||||||
#define WHISPER_H
|
#define WHISPER_H
|
||||||
|
|
||||||
#include "ggml.h"
|
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
@ -112,15 +110,15 @@ extern "C" {
|
|||||||
// Various functions for loading a ggml whisper model.
|
// Various functions for loading a ggml whisper model.
|
||||||
// Allocate (almost) all memory needed for the model.
|
// Allocate (almost) all memory needed for the model.
|
||||||
// Return NULL on failure
|
// Return NULL on failure
|
||||||
WHISPER_API struct whisper_context * whisper_init_from_file_with_params (const char * path_model, struct whisper_context_params params);
|
WHISPER_API struct whisper_context * whisper_init_from_file_with_params(const char * path_model, struct whisper_context_params params);
|
||||||
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size, struct whisper_context_params params);
|
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size, struct whisper_context_params params);
|
||||||
WHISPER_API struct whisper_context * whisper_init_with_params (struct whisper_model_loader * loader, struct whisper_context_params params);
|
WHISPER_API struct whisper_context * whisper_init_with_params(struct whisper_model_loader * loader, struct whisper_context_params params);
|
||||||
|
|
||||||
// These are the same as the above, but the internal state of the context is not allocated automatically
|
// These are the same as the above, but the internal state of the context is not allocated automatically
|
||||||
// It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
|
// It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
|
||||||
WHISPER_API struct whisper_context * whisper_init_from_file_with_params_no_state (const char * path_model, struct whisper_context_params params);
|
WHISPER_API struct whisper_context * whisper_init_from_file_with_params_no_state(const char * path_model, struct whisper_context_params params);
|
||||||
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size, struct whisper_context_params params);
|
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size, struct whisper_context_params params);
|
||||||
WHISPER_API struct whisper_context * whisper_init_with_params_no_state (struct whisper_model_loader * loader, struct whisper_context_params params);
|
WHISPER_API struct whisper_context * whisper_init_with_params_no_state(struct whisper_model_loader * loader, struct whisper_context_params params);
|
||||||
|
|
||||||
WHISPER_DEPRECATED(
|
WHISPER_DEPRECATED(
|
||||||
WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model),
|
WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model),
|
||||||
@ -572,7 +570,8 @@ extern "C" {
|
|||||||
|
|
||||||
// Control logging output; default behavior is to print to stderr
|
// Control logging output; default behavior is to print to stderr
|
||||||
|
|
||||||
WHISPER_API void whisper_log_set(ggml_log_callback log_callback, void * user_data);
|
typedef void (*whisper_log_callback)(const char * line);
|
||||||
|
WHISPER_API void whisper_set_log_callback(whisper_log_callback callback);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user