mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-18 07:30:08 +02:00
whisper : add full CUDA and Metal offloading (#1472)
* whisper : migrate to ggml-backend * whisper : fix logit reading * whisper : fix tensor allocation during load * whisper : fix beam-search with CUDA * whisper : free backends + fix compile warning * whisper : print when CUDA is enabled * whisper : fix CoreML * make : clean-up * talk : fix compile warning * whisper : support ggml_conv with CUDA and Metal (#1473) * ggml : add CUDA support for ggml_conv * whisper : remove ggml_repeat for conv bias + single backend * cuda : fix im2col kernel * metal : add im2col support + mul mat-vec f16 x f16 * bench-all : add q4 models * whisper : clean-up * quantize-all : fix * ggml : im2col opts * whisper : avoid whisper_model_data wrapper * whisper : add note that ggml_mul_mat_pad does not work with CUDA * whisper : factor out graph compute in common function * whisper : fixes * whisper : fix UB with measure buffers * whisper : try to fix the parallel whisper_state functionality (#1479) * whisper : try to fix the parallel whisper_state functionality * whisper : fix multi-state Metal * whisper : free backend instances in whisper_state
This commit is contained in:
@@ -18,11 +18,11 @@ else
|
||||
fi
|
||||
|
||||
models=( \
|
||||
"tiny" "tiny-q5_0" "tiny-q5_1" "tiny-q8_0" \
|
||||
"base" "base-q5_0" "base-q5_1" "base-q8_0" \
|
||||
"small" "small-q5_0" "small-q5_1" "small-q8_0" \
|
||||
"medium" "medium-q5_0" "medium-q5_1" "medium-q8_0" \
|
||||
"large" "large-q5_0" "large-q5_1" "large-q8_0" \
|
||||
"tiny" "tiny-q4_0" "tiny-q4_1" "tiny-q5_0" "tiny-q5_1" "tiny-q8_0" \
|
||||
"base" "base-q4_0" "base-q4_1" "base-q5_0" "base-q5_1" "base-q8_0" \
|
||||
"small" "small-q4_0" "small-q4_1" "small-q5_0" "small-q5_1" "small-q8_0" \
|
||||
"medium" "medium-q4_0" "medium-q4_1" "medium-q5_0" "medium-q5_1" "medium-q8_0" \
|
||||
"large" "large-q4_0" "large-q4_1" "large-q5_0" "large-q5_1" "large-q8_0" \
|
||||
)
|
||||
|
||||
if [ "$encoder_only" -eq 0 ]; then
|
||||
@@ -83,6 +83,10 @@ for model in "${models[@]}"; do
|
||||
config="$config COREML"
|
||||
fi
|
||||
|
||||
if [[ $system_info == *"CUDA = 1"* ]]; then
|
||||
config="$config CUDA"
|
||||
fi
|
||||
|
||||
if [[ $system_info == *"METAL = 1"* ]]; then
|
||||
config="$config METAL"
|
||||
fi
|
||||
|
@@ -15,33 +15,13 @@ declare -a filedex
|
||||
cd `dirname $0`
|
||||
cd ../
|
||||
|
||||
# Let's loop across all the objects in the 'models' dir:
|
||||
for i in ./models/*; do
|
||||
# Check to see if it's a file or directory
|
||||
if [ -d "$i" ]; then
|
||||
# It's a directory! We should make sure it's not empty first:
|
||||
if [ "$(ls -A $i)" ]; then
|
||||
# Passed! Let's go searching for bin files (shouldn't need to go more than a layer deep here)
|
||||
for f in "$i"/*.bin; do
|
||||
# [Neuron Activation]
|
||||
newfile=`echo "${f##*/}" | cut -d _ -f 1`;
|
||||
if [ "$newfile" != "q5" ]; then
|
||||
./quantize "${f}" "${i:-4}/${i:9:${#i}-4}-${qtype1}.bin" ${qtype1};
|
||||
./quantize "${f}" "${i:-4}/${i:9:${#i}-4}-${qtype0}.bin" ${qtype0};
|
||||
filedex+=( "${i:-4}/${i:9:${#i}-4}-${qtype1}.bin" "${i:-4}/${i:9:${#i}-4}-${qtype0}.bin" )
|
||||
fi
|
||||
done
|
||||
fi
|
||||
else
|
||||
# It's a file! Let's make sure it's the right type:
|
||||
if [ "${i##*.}" == "bin" ]; then
|
||||
# And we probably want to skip the testing files
|
||||
if [ "${i:9:8}" != "for-test" ]; then
|
||||
# [Neuron Activation]
|
||||
./quantize "${i}" "${i:-4}-${qtype1}.bin" ${qtype1};
|
||||
./quantize "${i}" "${i:-4}-${qtype0}.bin" ${qtype0};
|
||||
filedex+=( "${i:-4}-${qtype1}.bin" "${i:-4}-${qtype0}.bin" )
|
||||
fi
|
||||
for i in `ls ./models | grep ^ggml-.*.bin | grep -v "\-q"`; do
|
||||
m="models/$i"
|
||||
if [ -f "$m" ]; then
|
||||
if [ "${m##*.}" == "bin" ]; then
|
||||
./quantize "${m}" "${m::${#m}-4}-${qtype1}.bin" ${qtype1};
|
||||
./quantize "${m}" "${m::${#m}-4}-${qtype0}.bin" ${qtype0};
|
||||
filedex+=( "${m::${#m}-4}-${qtype1}.bin" "${m::${#m}-4}-${qtype0}.bin" )
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
Reference in New Issue
Block a user