diff --git a/Makefile b/Makefile index 4e1315f..04e7974 100644 --- a/Makefile +++ b/Makefile @@ -1,25 +1,71 @@ -CC_SDL=`sdl2-config --cflags --libs` +UNAME_S := $(shell uname -s) +UNAME_P := $(shell uname -p) +UNAME_M := $(shell uname -m) -main: ggml.o whisper.o main.o - g++ -pthread -o main ggml.o whisper.o main.o +# +# Compile flags +# + +CFLAGS = -O3 -std=c11 +CXXFLAGS = -O3 -std=c++11 + +CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function +CXXFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function + +# OS specific +# TODO: support Windows +ifeq ($(UNAME_S),Linux) + CFLAGS += -pthread +endif +ifeq ($(UNAME_S),Darwin) + CFLAGS += -pthread +endif + +# Architecture specific +ifeq ($(UNAME_P),x86_64) + CFLAGS += -mavx -mavx2 -mfma -mf16c +endif +ifneq ($(filter arm%,$(UNAME_P)),) + CFLAGS += -mfpu=neon +endif +ifneq ($(filter aarch64%,$(UNAME_M)),) + CFLAGS += -mfpu=neon +endif +ifneq ($(filter armv%,$(UNAME_M)),) + # Raspberry Pi 4 + CFLAGS += -mcpu=cortex-a72 -mfloat-abi=hard -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access +endif + +# +# Build library + main +# + +main: main.cpp ggml.o whisper.o + $(CXX) $(CXXFLAGS) main.cpp whisper.o ggml.o -o main ./main -h ggml.o: ggml.c ggml.h - gcc -pthread -O3 -mavx -mavx2 -mfma -mf16c -c ggml.c + $(CC) $(CFLAGS) -c ggml.c whisper.o: whisper.cpp whisper.h - gcc -pthread -O3 -std=c++11 -c whisper.cpp + $(CXX) $(CXXFLAGS) -c whisper.cpp -main.o: main.cpp ggml.h - g++ -pthread -O3 -std=c++11 -c main.cpp - -stream: stream.cpp - g++ -pthread -O3 -std=c++11 -o stream stream.cpp ggml.o whisper.o $(CC_SDL) - -# clean up the directory clean: rm -f *.o main +# +# Examples +# + +CC_SDL=`sdl2-config --cflags --libs` + +stream: stream.cpp ggml.o whisper.o + $(CXX) $(CXXFLAGS) stream.cpp ggml.o whisper.o -o stream $(CC_SDL) + +# +# Audio samples +# + # download a few audio samples into folder "./samples": .PHONY: samples samples: @@ -36,6 +82,9 @@ samples: @ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav @rm samples/mm1.wav +# +# Models +# # if not already downloaded, the following targets download the specified model and # runs it on all samples in the folder "./samples": diff --git a/main.cpp b/main.cpp index 1885eb6..6d1c55d 100644 --- a/main.cpp +++ b/main.cpp @@ -149,11 +149,11 @@ int main(int argc, char ** argv) { // convert to mono, float pcmf32.resize(n); if (wav.channels == 1) { - for (size_t i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { pcmf32[i] = float(pcm16[i])/32768.0f; } } else { - for (size_t i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f; } } diff --git a/stream.cpp b/stream.cpp index d0e40c2..e9d0364 100644 --- a/stream.cpp +++ b/stream.cpp @@ -238,7 +238,7 @@ int main(int argc, char ** argv) { } // process 3 seconds of new audio - while ((int) SDL_GetQueuedAudioSize(g_dev_id_in) < 3*WHISPER_SAMPLE_RATE*sizeof(float)) { + while (SDL_GetQueuedAudioSize(g_dev_id_in) < 3*WHISPER_SAMPLE_RATE*sizeof(float)) { SDL_Delay(1); } const int n_samples_new = SDL_GetQueuedAudioSize(g_dev_id_in)/sizeof(float); diff --git a/whisper.cpp b/whisper.cpp index 4f105ee..6fc0130 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -1031,8 +1031,6 @@ bool whisper_encode( const auto & mel_inp = wctx.mel; const auto & hparams = model.hparams; - const int n_vocab = hparams.n_vocab; - const int n_ctx = hparams.n_audio_ctx; const int n_state = hparams.n_audio_state; const int n_head = hparams.n_audio_head; @@ -2365,7 +2363,6 @@ int whisper_full( bool done = false; int seek_delta = 100*WHISPER_CHUNK_SIZE; - whisper_token last_id = 0; // print the prompt //printf("\n\n"); @@ -2395,8 +2392,6 @@ int whisper_full( // feel free to experiment! // { - const int n_vocab = whisper_n_vocab(ctx); - whisper_token id = 0; whisper_token tid = whisper_token_beg(ctx); @@ -2410,7 +2405,6 @@ int whisper_full( seek_delta = 2*(id - whisper_token_beg(ctx)); result_len = i + 1; } - last_id = id; // add it to the context prompt.push_back(id); @@ -2444,7 +2438,7 @@ int whisper_full( std::string text = ""; - for (int i = 0; i < result_cur.size(); i++) { + for (int i = 0; i < (int) result_cur.size(); i++) { if (params.print_special_tokens == false && result_cur[i].id >= whisper_token_eot(ctx)) { } else { text += whisper_token_to_str(ctx, result_cur[i].id); @@ -2464,7 +2458,7 @@ int whisper_full( result_all.push_back({ t0, t1, text }); } text = ""; - while (result_cur[i].id > whisper_token_beg(ctx) && i < result_cur.size()) { + while (result_cur[i].id > whisper_token_beg(ctx) && i < (int) result_cur.size()) { i++; } i--;