files : rename ./extra to ./scripts

2025-08-14 08:48:59 +02:00 · 2024-04-09 20:12:17 +03:00
parent 5275074d37
commit 52ccd4a3a8
12 changed files with 7 additions and 7 deletions
--- a/scripts/bench-all.sh
+++ b/scripts/bench-all.sh
@ -0,0 +1,100 @@
+#!/bin/bash
+
+# Helper script to run the bench tool on all models and print the results in share-able format
+
+printf "Usage: ./bench.sh [n_threads] [encoder-only]\n"
+
+if [ -z "$1" ]; then
+    n_threads=4
+else
+    n_threads=$1
+fi
+
+encoder_only=0
+if [ -z "$2" ]; then
+    encoder_only=0
+else
+    encoder_only=$2
+fi
+
+models=(                                                                                                    \
+      "tiny"     "tiny-q4_0"     "tiny-q4_1"     "tiny-q5_0"     "tiny-q5_1"     "tiny-q8_0"                \
+      "base"     "base-q4_0"     "base-q4_1"     "base-q5_0"     "base-q5_1"     "base-q8_0"                \
+     "small"    "small-q4_0"    "small-q4_1"    "small-q5_0"    "small-q5_1"    "small-q8_0"                \
+    "medium"   "medium-q4_0"   "medium-q4_1"   "medium-q5_0"   "medium-q5_1"   "medium-q8_0"   "medium-dis" \
+  "large-v2" "large-v2-q4_0" "large-v2-q4_1" "large-v2-q5_0" "large-v2-q5_1" "large-v2-q8_0" "large-v2-dis" \
+)
+
+if [ "$encoder_only" -eq 0 ]; then
+    printf "\n"
+    printf "Running memcpy benchmark\n"
+    printf "\n"
+
+    ./bench -w 1 -t $n_threads 2>&1
+
+    printf "\n"
+    printf "Running ggml_mul_mat benchmark with $n_threads threads\n"
+    printf "\n"
+
+    ./bench -w 2 -t $n_threads 2>&1
+
+    printf "\n"
+    printf "Running benchmark for all models\n"
+    printf "This can take a while!\n"
+    printf "\n"
+fi
+
+printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "CPU" "OS" "Config" "Model" "Th" "Enc." "Dec." "Bch5" "PP" "Commit"
+printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "---" "---" "---" "---" "---" "---" "---" "---" "---" "---"
+
+for model in "${models[@]}"; do
+    # actual run
+    # store stderr output in a variable in order to parse it later
+    output=$(./bench -m ./models/ggml-$model.bin -t $n_threads 2>&1)
+    ret=$?
+
+    # parse the output:
+    encode_time=$(echo "$output" | grep "encode time" | awk '{print $11}')
+    decode_time=$(echo "$output" | grep "decode time" | awk '{print $11}')
+    batchd_time=$(echo "$output" | grep "batchd time" | awk '{print $11}')
+    prompt_time=$(echo "$output" | grep "prompt time" | awk '{print $11}')
+    system_info=$(echo "$output" | grep "system_info")
+    n_threads=$(echo "$output" | grep "system_info" | awk '{print $4}')
+
+    # floor to milliseconds
+    #encode_time=${encode_time%.*}
+    #decode_time=${decode_time%.*}
+    #prompt_time=${prompt_time%.*}
+
+    config=""
+
+    if [[ $system_info == *"AVX2 = 1"* ]]; then
+        config="$config AVX2"
+    fi
+
+    if [[ $system_info == *"NEON = 1"* ]]; then
+        config="$config NEON"
+    fi
+
+    if [[ $system_info == *"BLAS = 1"* ]]; then
+        config="$config BLAS"
+    fi
+
+    if [[ $system_info == *"COREML = 1"* ]]; then
+        config="$config COREML"
+    fi
+
+    if [[ $system_info == *"CUDA = 1"* ]]; then
+        config="$config CUDA"
+    fi
+
+    if [[ $system_info == *"METAL = 1"* ]]; then
+        config="$config METAL"
+    fi
+
+    commit=$(git rev-parse --short HEAD)
+
+    if [ $ret -eq 0 ]; then
+        printf "| <todo> | <todo> | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "$config" "$model" "$n_threads" "$encode_time" "$decode_time" "$batchd_time" "$prompt_time" "$commit"
+    fi
+done
--- a/scripts/bench-wts.sh
+++ b/scripts/bench-wts.sh
@ -0,0 +1,70 @@
+# Benchmark word-level timestamps for different models
+#
+# This script takes two arguments
+# - an audio file
+# - [optional] path to a font file
+
+# I'm using "/usr/share/fonts/truetype/freefont/FreeMono.ttf" on Ubuntu
+
+if [ -z "$1" ]; then
+    echo "Usage: $0 <audio file> [font file]"
+    exit 1
+fi
+
+#TODO: Make this a command line parameter
+#models="base small large"
+#models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large"
+models="tiny.en base.en small.en medium.en large"
+
+DURATION=$(ffprobe -i $1 -show_entries format=duration -v quiet -of csv="p=0")
+DURATION=$(printf "%.2f" $DURATION)
+echo "Input file duration: ${DURATION}s"
+
+for model in $models; do
+    echo "Running $model"
+    COMMAND="./main -m models/ggml-$model.bin -owts -f $1 -of $1.$model"
+
+    if [ ! -z "$2" ]; then
+        COMMAND="$COMMAND -fp $2"
+    fi
+    #TODO: Surface errors better
+    # TIMEFMT is for zsh, TIMEFORMAT is for bash
+    EXECTIME=$({ TIMEFMT="%E";TIMEFORMAT=%E; time $COMMAND >/dev/null 2>&1; } 2>&1)
+
+    # Slightly different formats between zsh and bash
+    if [ "${EXECTIME: -1}" == "s" ]; then
+        EXECTIME=${EXECTIME::-1}
+    fi
+
+    RATIO=$(echo "$DURATION / $EXECTIME" | bc -l)
+    RATIO=$(printf "%.2f" $RATIO)
+
+    echo "Execution time: ${EXECTIME}s (${RATIO}x realtime)"
+
+    # If the file already exists, delete it
+    if [ -f $1.mp4 ]; then
+        rm $1.mp4
+    fi
+
+    bash $1.$model.wts >/dev/null 2>&1
+    mv $1.mp4 $1.$model.mp4
+
+    ffmpeg -y -f lavfi -i color=c=black:s=1200x50:d=$DURATION -vf "drawtext=fontfile=$2:fontsize=36:x=10:y=(h-text_h)/2:text='ggml-$model - ${EXECTIME}s (${RATIO}x realtime)':fontcolor=lightgrey" $1.$model.info.mp4 >/dev/null 2>&1
+done
+
+COMMAND="ffmpeg -y"
+for model in $models; do
+    COMMAND="$COMMAND -i $1.$model.info.mp4 -i $1.$model.mp4"
+done
+COMMAND="$COMMAND -filter_complex \""
+COUNT=0
+for model in $models; do
+    COMMAND="$COMMAND[${COUNT}:v][$(($COUNT+1)):v]"
+    COUNT=$((COUNT+2))
+done
+COMMAND="$COMMAND vstack=inputs=${COUNT}[v]\" -map \"[v]\" -map 1:a $1.all.mp4 >/dev/null 2>&1"
+
+echo $COMMAND
+
+# Run the command
+eval $COMMAND
--- a/scripts/bench.py
+++ b/scripts/bench.py
@ -0,0 +1,224 @@
+import os
+import subprocess
+import re
+import csv
+import wave
+import contextlib
+import argparse
+
+
+# Custom action to handle comma-separated list
+class ListAction(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, [int(val) for val in values.split(",")])
+
+
+parser = argparse.ArgumentParser(description="Benchmark the speech recognition model")
+
+# Define the argument to accept a list
+parser.add_argument(
+    "-t",
+    "--threads",
+    dest="threads",
+    action=ListAction,
+    default=[4],
+    help="List of thread counts to benchmark (comma-separated, default: 4)",
+)
+
+parser.add_argument(
+    "-p",
+    "--processors",
+    dest="processors",
+    action=ListAction,
+    default=[1],
+    help="List of processor counts to benchmark (comma-separated, default: 1)",
+)
+
+
+parser.add_argument(
+    "-f",
+    "--filename",
+    type=str,
+    default="./samples/jfk.wav",
+    help="Relative path of the file to transcribe (default: ./samples/jfk.wav)",
+)
+
+# Parse the command line arguments
+args = parser.parse_args()
+
+sample_file = args.filename
+
+threads = args.threads
+processors = args.processors
+
+# Define the models, threads, and processor counts to benchmark
+models = [
+    "ggml-tiny.en.bin",
+    "ggml-tiny.bin",
+    "ggml-base.en.bin",
+    "ggml-base.bin",
+    "ggml-small.en.bin",
+    "ggml-small.bin",
+    "ggml-medium.en.bin",
+    "ggml-medium.bin",
+    "ggml-large-v1.bin",
+    "ggml-large-v2.bin",
+    "ggml-large-v3.bin",
+]
+
+
+metal_device = ""
+
+# Initialize a dictionary to hold the results
+results = {}
+
+gitHashHeader = "Commit"
+modelHeader = "Model"
+hardwareHeader = "Hardware"
+recordingLengthHeader = "Recording Length (seconds)"
+threadHeader = "Thread"
+processorCountHeader = "Processor Count"
+loadTimeHeader = "Load Time (ms)"
+sampleTimeHeader = "Sample Time (ms)"
+encodeTimeHeader = "Encode Time (ms)"
+decodeTimeHeader = "Decode Time (ms)"
+sampleTimePerRunHeader = "Sample Time per Run (ms)"
+encodeTimePerRunHeader = "Encode Time per Run (ms)"
+decodeTimePerRunHeader = "Decode Time per Run (ms)"
+totalTimeHeader = "Total Time (ms)"
+
+
+def check_file_exists(file: str) -> bool:
+    return os.path.isfile(file)
+
+
+def get_git_short_hash() -> str:
+    try:
+        return (
+            subprocess.check_output(["git", "rev-parse", "--short", "HEAD"])
+            .decode()
+            .strip()
+        )
+    except subprocess.CalledProcessError as e:
+        return ""
+
+
+def wav_file_length(file: str = sample_file) -> float:
+    with contextlib.closing(wave.open(file, "r")) as f:
+        frames = f.getnframes()
+        rate = f.getframerate()
+        duration = frames / float(rate)
+        return duration
+
+
+def extract_metrics(output: str, label: str) -> tuple[float, float]:
+    match = re.search(rf"{label} \s*=\s*(\d+\.\d+)\s*ms\s*/\s*(\d+)\s*runs", output)
+    time = float(match.group(1)) if match else None
+    runs = float(match.group(2)) if match else None
+    return time, runs
+
+
+def extract_device(output: str) -> str:
+    match = re.search(r"picking default device: (.*)", output)
+    device = match.group(1) if match else "Not found"
+    return device
+
+
+# Check if the sample file exists
+if not check_file_exists(sample_file):
+    raise FileNotFoundError(f"Sample file {sample_file} not found")
+
+recording_length = wav_file_length()
+
+
+# Check that all models exist
+# Filter out models from list that are not downloaded
+filtered_models = []
+for model in models:
+    if check_file_exists(f"models/{model}"):
+        filtered_models.append(model)
+    else:
+        print(f"Model {model} not found, removing from list")
+
+models = filtered_models
+
+# Loop over each combination of parameters
+for model in filtered_models:
+    for thread in threads:
+        for processor_count in processors:
+            # Construct the command to run
+            cmd = f"./main -m models/{model} -t {thread} -p {processor_count} -f {sample_file}"
+            # Run the command and get the output
+            process = subprocess.Popen(
+                cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+            )
+
+            output = ""
+            while process.poll() is None:
+                output += process.stdout.read().decode()
+
+            # Parse the output
+            load_time_match = re.search(r"load time\s*=\s*(\d+\.\d+)\s*ms", output)
+            load_time = float(load_time_match.group(1)) if load_time_match else None
+
+            metal_device = extract_device(output)
+            sample_time, sample_runs = extract_metrics(output, "sample time")
+            encode_time, encode_runs = extract_metrics(output, "encode time")
+            decode_time, decode_runs = extract_metrics(output, "decode time")
+
+            total_time_match = re.search(r"total time\s*=\s*(\d+\.\d+)\s*ms", output)
+            total_time = float(total_time_match.group(1)) if total_time_match else None
+
+            model_name = model.replace("ggml-", "").replace(".bin", "")
+
+            print(
+                f"Ran model={model_name} threads={thread} processor_count={processor_count}, took {total_time}ms"
+            )
+            # Store the times in the results dictionary
+            results[(model_name, thread, processor_count)] = {
+                loadTimeHeader: load_time,
+                sampleTimeHeader: sample_time,
+                encodeTimeHeader: encode_time,
+                decodeTimeHeader: decode_time,
+                sampleTimePerRunHeader: round(sample_time / sample_runs, 2),
+                encodeTimePerRunHeader: round(encode_time / encode_runs, 2),
+                decodeTimePerRunHeader: round(decode_time / decode_runs, 2),
+                totalTimeHeader: total_time,
+            }
+
+# Write the results to a CSV file
+with open("benchmark_results.csv", "w", newline="") as csvfile:
+    fieldnames = [
+        gitHashHeader,
+        modelHeader,
+        hardwareHeader,
+        recordingLengthHeader,
+        threadHeader,
+        processorCountHeader,
+        loadTimeHeader,
+        sampleTimeHeader,
+        encodeTimeHeader,
+        decodeTimeHeader,
+        sampleTimePerRunHeader,
+        encodeTimePerRunHeader,
+        decodeTimePerRunHeader,
+        totalTimeHeader,
+    ]
+    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+    writer.writeheader()
+
+    shortHash = get_git_short_hash()
+    # Sort the results by total time in ascending order
+    sorted_results = sorted(results.items(), key=lambda x: x[1].get(totalTimeHeader, 0))
+    for params, times in sorted_results:
+        row = {
+            gitHashHeader: shortHash,
+            modelHeader: params[0],
+            hardwareHeader: metal_device,
+            recordingLengthHeader: recording_length,
+            threadHeader: params[1],
+            processorCountHeader: params[2],
+        }
+        row.update(times)
+        writer.writerow(row)
--- a/scripts/convert-all.sh
+++ b/scripts/convert-all.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
+
+for model in "${models[@]}"; do
+    python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
+    mv -v models/ggml-model.bin models/ggml-$model.bin
+done
--- a/scripts/deploy-wasm.sh
+++ b/scripts/deploy-wasm.sh
@ -0,0 +1,31 @@
+#!/bin/bash
+#
+# This is a helper script to deploy all WebAssembly examples to my node
+# Run from the build directory:
+#
+# cd build-em
+# ../scripts/deploy-wasm.sh
+#
+
+# check if emcmake is available
+if ! command -v emcmake &> /dev/null
+then
+    echo "Error: emscripten environment is not set up"
+    exit
+fi
+
+emcmake cmake .. && make -j
+if [ $? -ne 0 ]; then
+    echo "Error: build failed"
+    exit
+fi
+
+# copy all wasm files to the node
+scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/         && scp bin/libmain.worker.js    root@linode0:/var/www/html/whisper/
+scp bin/stream.wasm/*  root@linode0:/var/www/html/whisper/stream/  && scp bin/libstream.worker.js  root@linode0:/var/www/html/whisper/stream/
+scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
+scp bin/talk.wasm/*    root@linode0:/var/www/html/whisper/talk/    && scp bin/libtalk.worker.js    root@linode0:/var/www/html/whisper/talk/
+scp bin/bench.wasm/*   root@linode0:/var/www/html/whisper/bench/   && scp bin/libbench.worker.js   root@linode0:/var/www/html/whisper/bench/
+
+echo "Done"
+exit
--- a/scripts/quantize-all.sh
+++ b/scripts/quantize-all.sh
@ -0,0 +1,37 @@
+#!/bin/bash
+
+printf "Usage: $0 <upload>"
+
+if [ $# -ne 1 ]; then
+    printf "\nError: Invalid number of arguments\n"
+    exit 1
+fi
+
+qtype0="q5_0"
+qtype1="q5_1"
+upload="$1"
+declare -a filedex
+
+cd `dirname $0`
+cd ../
+
+for i in `ls ./models | grep ^ggml-.*.bin | grep -v "\-q"`; do
+    m="models/$i"
+    if [ -f "$m" ]; then
+        if [ "${m##*.}" == "bin" ]; then
+            ./quantize "${m}" "${m::${#m}-4}-${qtype1}.bin" ${qtype1};
+            ./quantize "${m}" "${m::${#m}-4}-${qtype0}.bin" ${qtype0};
+            filedex+=( "${m::${#m}-4}-${qtype1}.bin" "${m::${#m}-4}-${qtype0}.bin" )
+        fi
+    fi
+done
+
+
+
+if [ "$upload" == "1" ]; then
+    for i in ${!filedex[@]}; do
+        if [ "${filedex[$i]:9:8}" != "for-test" ]; then
+            scp ${filedex[$i]} root@linode0:/mnt/Data/ggml/ggml-model-${filedex[$i]:9}
+        fi
+    done
+fi
--- a/scripts/sha-all.sh
+++ b/scripts/sha-all.sh
@ -0,0 +1,7 @@
+#!/bin/bash
+
+# Compute the SHA1 of all model files in ./models/ggml-*.bin
+
+for f in ./models/ggml-*.bin; do
+    shasum "$f" -a 1
+done
--- a/scripts/sync-ggml-am.sh
+++ b/scripts/sync-ggml-am.sh
@ -0,0 +1,189 @@
+#!/bin/bash
+#
+# Synchronize ggml changes to whisper.cpp
+#
+# Usage:
+#
+#   $ cd /path/to/whisper.cpp
+#   $ ./scripts/sync-ggml-am.sh -skip hash0,hash1,hash2...
+#
+
+set -e
+
+sd=$(dirname $0)
+cd $sd/../
+
+SRC_WHISPER=$(pwd)
+SRC_GGML=$(cd ../ggml; pwd)
+
+if [ ! -d $SRC_GGML ]; then
+    echo "ggml not found at $SRC_GGML"
+    exit 1
+fi
+
+lc=$(cat $SRC_WHISPER/scripts/sync-ggml.last)
+echo "Syncing ggml changes since commit $lc"
+
+to_skip=""
+if [ "$1" == "-skip" ]; then
+    to_skip=$2
+fi
+
+cd $SRC_GGML
+
+git log --oneline $lc..HEAD
+git log --oneline $lc..HEAD --reverse | grep -v "(whisper/[0-9]*)" | cut -d' ' -f1 > $SRC_WHISPER/ggml-commits
+
+if [ ! -s $SRC_WHISPER/ggml-commits ]; then
+    rm -v $SRC_WHISPER/ggml-commits
+    echo "No new commits"
+    exit 0
+fi
+
+if [ -f $SRC_WHISPER/ggml-src.patch ]; then
+    rm -v $SRC_WHISPER/ggml-src.patch
+fi
+
+while read c; do
+    if [ -n "$to_skip" ]; then
+        if [[ $to_skip == *"$c"* ]]; then
+            echo "Skipping $c"
+            continue
+        fi
+    fi
+
+    git format-patch -k $c~1..$c --stdout -- \
+        include/ggml/ggml*.h \
+        src/ggml*.h \
+        src/ggml*.c \
+        src/ggml*.cpp \
+        src/ggml*.m \
+        src/ggml*.metal \
+        src/ggml*.cu \
+        src/ggml-cuda/* \
+        examples/common.h \
+        examples/common.cpp \
+        examples/common-ggml.h \
+        examples/common-ggml.cpp \
+        examples/whisper/grammar-parser.h \
+        examples/whisper/grammar-parser.cpp \
+        examples/whisper/whisper.h \
+        examples/whisper/whisper.cpp \
+        examples/whisper/main.cpp \
+        examples/whisper/quantize.cpp \
+        >> $SRC_WHISPER/ggml-src.patch
+done < $SRC_WHISPER/ggml-commits
+
+rm -v $SRC_WHISPER/ggml-commits
+
+# delete files if empty
+if [ ! -s $SRC_WHISPER/ggml-src.patch ]; then
+    rm -v $SRC_WHISPER/ggml-src.patch
+fi
+
+cd $SRC_WHISPER
+
+if [ -f $SRC_WHISPER/ggml-src.patch ]; then
+    # replace PR numbers
+    #
+    # Subject: some text (#1234)
+    # Subject: some text (ggml/1234)
+    cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp
+    mv ggml-src.patch.tmp ggml-src.patch
+
+    cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp
+    mv ggml-src.patch.tmp ggml-src.patch
+
+    # replace filenames:
+    #
+    # src/ggml.c                  -> ggml.c
+    # src/ggml-alloc.c            -> ggml-alloc.c
+    # src/ggml-backend-impl.h     -> ggml-backend-impl.h
+    # src/ggml-backend.c          -> ggml-backend.c
+    # src/ggml-common.h           -> ggml-common.h
+    # src/ggml-cuda/*             -> ggml-cuda/
+    # src/ggml-cuda.cu            -> ggml-cuda.cu
+    # src/ggml-cuda.h             -> ggml-cuda.h
+    # src/ggml-impl.h             -> ggml-impl.h
+    # src/ggml-kompute.cpp        -> ggml-kompute.cpp
+    # src/ggml-kompute.h          -> ggml-kompute.h
+    # src/ggml-metal.h            -> ggml-metal.h
+    # src/ggml-metal.m            -> ggml-metal.m
+    # src/ggml-mpi.h              -> ggml-mpi.h
+    # src/ggml-mpi.c              -> ggml-mpi.c
+    # src/ggml-opencl.cpp         -> ggml-opencl.cpp
+    # src/ggml-opencl.h           -> ggml-opencl.h
+    # src/ggml-quants.c           -> ggml-quants.c
+    # src/ggml-quants.h           -> ggml-quants.h
+    # src/ggml-sycl.cpp           -> ggml-sycl.cpp
+    # src/ggml-sycl.h             -> ggml-sycl.h
+    # src/ggml-vulkan.cpp         -> ggml-vulkan.cpp
+    # src/ggml-vulkan.h           -> ggml-vulkan.h
+    # include/ggml/ggml.h         -> ggml.h
+    # include/ggml/ggml-alloc.h   -> ggml-alloc.h
+    # include/ggml/ggml-backend.h -> ggml-backend.h
+    #
+    # examples/common.h                   -> examples/common.h
+    # examples/common.cpp                 -> examples/common.cpp
+    # examples/common-ggml.h              -> examples/common-ggml.h
+    # examples/common-ggml.cpp            -> examples/common-ggml.cpp
+    # examples/whisper/grammar-parser.h   -> examples/grammar-parser.h
+    # examples/whisper/grammar-parser.cpp -> examples/grammar-parser.cpp
+    #
+    # examples/whisper/whisper.h    -> whisper.h
+    # examples/whisper/whisper.cpp  -> whisper.cpp
+    # examples/whisper/main.cpp     -> examples/main/main.cpp
+    # examples/whisper/quantize.cpp -> examples/quantize/quantize.cpp
+
+    cat ggml-src.patch | sed \
+        -e 's/src\/ggml\.c/ggml.c/g' \
+        -e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
+        -e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
+        -e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
+        -e 's/src\/ggml-common\.h/ggml-common.h/g' \
+        -e 's/src\/ggml-cuda\//ggml-cuda\//g' \
+        -e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
+        -e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
+        -e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
+        -e 's/src\/ggml-kompute\.cpp/ggml-kompute.cpp/g' \
+        -e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
+        -e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
+        -e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
+        -e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \
+        -e 's/src\/ggml-mpi\.c/ggml-mpi.c/g' \
+        -e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
+        -e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
+        -e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
+        -e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
+        -e 's/src\/ggml-sycl\.cpp/ggml-sycl.cpp/g' \
+        -e 's/src\/ggml-sycl\.h/ggml-sycl.h/g' \
+        -e 's/src\/ggml-vulkan\.cpp/ggml-vulkan.cpp/g' \
+        -e 's/src\/ggml-vulkan\.h/ggml-vulkan.h/g' \
+        -e 's/include\/ggml\/ggml\.h/ggml.h/g' \
+        -e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
+        -e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
+        -e 's/examples\/common\.h/examples\/common.h/g' \
+        -e 's/examples\/common\.cpp/examples\/common.cpp/g' \
+        -e 's/examples\/common-ggml\.h/examples\/common-ggml.h/g' \
+        -e 's/examples\/common-ggml\.cpp/examples\/common-ggml.cpp/g' \
+        -e 's/examples\/whisper\/grammar-parser\.h/examples\/grammar-parser.h/g' \
+        -e 's/examples\/whisper\/grammar-parser\.cpp/examples\/grammar-parser.cpp/g' \
+        -e 's/examples\/whisper\/whisper\.h/whisper.h/g' \
+        -e 's/examples\/whisper\/whisper\.cpp/whisper.cpp/g' \
+        -e 's/examples\/whisper\/main\.cpp/examples\/main\/main.cpp/g' \
+        -e 's/examples\/whisper\/quantize\.cpp/examples\/quantize\/quantize.cpp/g' \
+        > ggml-src.patch.tmp
+    mv ggml-src.patch.tmp ggml-src.patch
+
+    git am ggml-src.patch
+
+    rm -v $SRC_WHISPER/ggml-src.patch
+fi
+
+# update last commit
+cd $SRC_GGML
+git log -1 --format=%H > $SRC_WHISPER/scripts/sync-ggml.last
+
+echo "Done"
+
+exit 0
--- a/scripts/sync-ggml.last
+++ b/scripts/sync-ggml.last
@ -0,0 +1 @@
+bb8d8cff851b2de6fde4904be492d39458837e1a
--- a/scripts/sync-ggml.sh
+++ b/scripts/sync-ggml.sh
@ -0,0 +1,43 @@
+#!/bin/bash
+
+cp -rpv ../ggml/src/ggml.c              ./ggml.c
+cp -rpv ../ggml/src/ggml-impl.h         ./ggml-impl.h
+cp -rpv ../ggml/src/ggml-alloc.c        ./ggml-alloc.c
+cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
+cp -rpv ../ggml/src/ggml-backend.c      ./ggml-backend.c
+cp -rpv ../ggml/src/ggml-common.h       ./ggml-common.h
+cp -rpv ../ggml/src/ggml-cuda/*         ./ggml-cuda/
+cp -rpv ../ggml/src/ggml-cuda.cu        ./ggml-cuda.cu
+cp -rpv ../ggml/src/ggml-cuda.h         ./ggml-cuda.h
+cp -rpv ../ggml/src/ggml-kompute.cpp    ./ggml-kompute.cpp
+cp -rpv ../ggml/src/ggml-kompute.h      ./ggml-kompute.h
+cp -rpv ../ggml/src/ggml-metal.h        ./ggml-metal.h
+cp -rpv ../ggml/src/ggml-metal.m        ./ggml-metal.m
+cp -rpv ../ggml/src/ggml-metal.metal    ./ggml-metal.metal
+#cp -rpv ../ggml/src/ggml-mpi.h          ./ggml-mpi.h
+#cp -rpv ../ggml/src/ggml-mpi.c          ./ggml-mpi.c
+cp -rpv ../ggml/src/ggml-opencl.cpp     ./ggml-opencl.cpp
+cp -rpv ../ggml/src/ggml-opencl.h       ./ggml-opencl.h
+cp -rpv ../ggml/src/ggml-quants.c       ./ggml-quants.c
+cp -rpv ../ggml/src/ggml-quants.h       ./ggml-quants.h
+cp -rpv ../ggml/src/ggml-sycl.cpp       ./ggml-sycl.cpp
+cp -rpv ../ggml/src/ggml-sycl.h         ./ggml-sycl.h
+cp -rpv ../ggml/src/ggml-vulkan.cpp     ./ggml-vulkan.cpp
+cp -rpv ../ggml/src/ggml-vulkan.h       ./ggml-vulkan.h
+
+cp -rpv ../ggml/include/ggml/ggml.h         ./ggml.h
+cp -rpv ../ggml/include/ggml/ggml-alloc.h   ./ggml-alloc.h
+cp -rpv ../ggml/include/ggml/ggml-backend.h ./ggml-backend.h
+
+cp -rpv ../ggml/examples/common.h                   ./examples/common.h
+cp -rpv ../ggml/examples/common.cpp                 ./examples/common.cpp
+cp -rpv ../ggml/examples/common-ggml.h              ./examples/common-ggml.h
+cp -rpv ../ggml/examples/common-ggml.cpp            ./examples/common-ggml.cpp
+cp -rpv ../ggml/examples/whisper/grammar-parser.h   ./examples/grammar-parser.h
+cp -rpv ../ggml/examples/whisper/grammar-parser.cpp ./examples/grammar-parser.cpp
+
+cp -rpv ../ggml/examples/whisper/whisper.h    ./whisper.h
+cp -rpv ../ggml/examples/whisper/whisper.cpp  ./whisper.cpp
+cp -rpv ../ggml/examples/whisper/main.cpp     ./examples/main/main.cpp
+cp -rpv ../ggml/examples/whisper/quantize.cpp ./examples/quantize/quantize.cpp
+
--- a/scripts/sync-llama.sh
+++ b/scripts/sync-llama.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+
+cp -rpv ../llama.cpp/llama.h          ./examples/talk-llama/llama.h
+cp -rpv ../llama.cpp/llama.cpp        ./examples/talk-llama/llama.cpp
+cp -rpv ../llama.cpp/unicode.h        ./examples/talk-llama/unicode.h
+cp -rpv ../llama.cpp/unicode.cpp      ./examples/talk-llama/unicode.cpp
+cp -rpv ../llama.cpp/unicode-data.h   ./examples/talk-llama/unicode-data.h
+cp -rpv ../llama.cpp/unicode-data.cpp ./examples/talk-llama/unicode-data.cpp