mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-05-02 23:24:48 +02:00
build : Add Moore Threads GPU support and update GitHub workflow for MUSA build (#3069)
* Update PATH for main/main-cuda container Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Add Dockerfile for musa, .dockerignore and update CI Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Add Moore Threads GPU Support in README.md and replace ./main with whisper-cli Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Forward GGML_CUDA/GGML_MUSA to cmake in Makefile Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Minor updates for PATH ENV in Dockerfiles Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Address comments Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
This commit is contained in:
parent
f9b2dfdd8c
commit
50218b935d
@ -13,8 +13,6 @@ WORKDIR /app
|
|||||||
ARG CUDA_DOCKER_ARCH=all
|
ARG CUDA_DOCKER_ARCH=all
|
||||||
# Set nvcc architecture
|
# Set nvcc architecture
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
# Enable cuBLAS
|
|
||||||
ENV GGML_CUDA=1
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y build-essential libsdl2-dev wget cmake git \
|
apt-get install -y build-essential libsdl2-dev wget cmake git \
|
||||||
@ -25,7 +23,8 @@ ENV CUDA_MAIN_VERSION=12.3
|
|||||||
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
COPY .. .
|
COPY .. .
|
||||||
RUN make base.en
|
# Enable cuBLAS
|
||||||
|
RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1"
|
||||||
|
|
||||||
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
|
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
|
||||||
ENV CUDA_MAIN_VERSION=12.3
|
ENV CUDA_MAIN_VERSION=12.3
|
||||||
@ -37,4 +36,5 @@ RUN apt-get update && \
|
|||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||||
|
|
||||||
COPY --from=build /app /app
|
COPY --from=build /app /app
|
||||||
|
ENV PATH=/app/build/bin:$PATH
|
||||||
ENTRYPOINT [ "bash", "-c" ]
|
ENTRYPOINT [ "bash", "-c" ]
|
||||||
|
29
.devops/main-musa.Dockerfile
Normal file
29
.devops/main-musa.Dockerfile
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG MUSA_VERSION=rc3.1.1
|
||||||
|
# Target the MUSA build image
|
||||||
|
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||||
|
# Target the MUSA runtime image
|
||||||
|
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential libsdl2-dev wget cmake git \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||||
|
|
||||||
|
COPY .. .
|
||||||
|
# Enable muBLAS
|
||||||
|
RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1"
|
||||||
|
|
||||||
|
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y curl ffmpeg wget cmake git \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||||
|
|
||||||
|
COPY --from=build /app /app
|
||||||
|
ENV PATH=/app/build/bin:$PATH
|
||||||
|
ENTRYPOINT [ "bash", "-c" ]
|
@ -16,4 +16,5 @@ RUN apt-get update && \
|
|||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||||
|
|
||||||
COPY --from=build /app /app
|
COPY --from=build /app /app
|
||||||
|
ENV PATH=/app/build/bin:$PATH
|
||||||
ENTRYPOINT [ "bash", "-c" ]
|
ENTRYPOINT [ "bash", "-c" ]
|
||||||
|
3
.dockerignore
Normal file
3
.dockerignore
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
build*/
|
||||||
|
.github/
|
||||||
|
.devops/
|
1
.github/workflows/docker.yml
vendored
1
.github/workflows/docker.yml
vendored
@ -18,6 +18,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
|
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
|
||||||
|
- { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
|
||||||
#TODO: the cuda image keeps failing - disable for now
|
#TODO: the cuda image keeps failing - disable for now
|
||||||
# https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
|
# https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
|
||||||
#- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
|
#- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
|
||||||
|
4
Makefile
4
Makefile
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
.PHONY: build
|
.PHONY: build
|
||||||
build:
|
build:
|
||||||
cmake -B build
|
cmake -B build $(CMAKE_ARGS)
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release
|
||||||
|
|
||||||
# download a few audio samples into folder "./samples":
|
# download a few audio samples into folder "./samples":
|
||||||
@ -41,7 +41,7 @@ samples:
|
|||||||
|
|
||||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
|
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
|
||||||
bash ./models/download-ggml-model.sh $@
|
bash ./models/download-ggml-model.sh $@
|
||||||
cmake -B build
|
cmake -B build $(CMAKE_ARGS)
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "==============================================="
|
@echo "==============================================="
|
||||||
|
25
README.md
25
README.md
@ -23,6 +23,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
|
|||||||
- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
|
- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
|
||||||
- [OpenVINO Support](#openvino-support)
|
- [OpenVINO Support](#openvino-support)
|
||||||
- [Ascend NPU Support](#ascend-npu-support)
|
- [Ascend NPU Support](#ascend-npu-support)
|
||||||
|
- [Moore Threads GPU Support](#moore-threads-gpu-support)
|
||||||
- [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
|
- [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
|
||||||
|
|
||||||
Supported platforms:
|
Supported platforms:
|
||||||
@ -381,6 +382,25 @@ Run the inference examples as usual, for example:
|
|||||||
- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
|
- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
|
||||||
- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
|
- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
|
||||||
|
|
||||||
|
## Moore Threads GPU support
|
||||||
|
|
||||||
|
With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
|
||||||
|
First, make sure you have installed `MUSA SDK rc3.1.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=rc3.1.1
|
||||||
|
|
||||||
|
Now build `whisper.cpp` with MUSA support:
|
||||||
|
|
||||||
|
```
|
||||||
|
cmake -B build -DGGML_MUSA=1
|
||||||
|
cmake --build build -j --config Release
|
||||||
|
```
|
||||||
|
|
||||||
|
or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21"
|
||||||
|
cmake --build build -j --config Release
|
||||||
|
```
|
||||||
|
|
||||||
## FFmpeg support (Linux only)
|
## FFmpeg support (Linux only)
|
||||||
|
|
||||||
If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
|
If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
|
||||||
@ -425,6 +445,7 @@ We have two Docker images available for this project:
|
|||||||
|
|
||||||
1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
|
1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
|
||||||
2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
|
2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
|
||||||
|
3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`)
|
||||||
|
|
||||||
### Usage
|
### Usage
|
||||||
|
|
||||||
@ -437,11 +458,11 @@ docker run -it --rm \
|
|||||||
docker run -it --rm \
|
docker run -it --rm \
|
||||||
-v path/to/models:/models \
|
-v path/to/models:/models \
|
||||||
-v path/to/audios:/audios \
|
-v path/to/audios:/audios \
|
||||||
whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
|
whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav"
|
||||||
# transcribe an audio file in samples folder
|
# transcribe an audio file in samples folder
|
||||||
docker run -it --rm \
|
docker run -it --rm \
|
||||||
-v path/to/models:/models \
|
-v path/to/models:/models \
|
||||||
whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
|
whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installing with Conan
|
## Installing with Conan
|
||||||
|
Loading…
Reference in New Issue
Block a user