build : Add Moore Threads GPU support and update GitHub workflow for MUSA build (#3069)

* Update PATH for main/main-cuda container Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Add Dockerfile for musa, .dockerignore and update CI Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Add Moore Threads GPU Support in README.md and replace ./main with whisper-cli Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Forward GGML_CUDA/GGML_MUSA to cmake in Makefile Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Minor updates for PATH ENV in Dockerfiles Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Address comments Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
2025-06-30 22:40:14 +02:00 · 2025-04-28 16:06:41 +08:00
parent f9b2dfdd8c
commit 50218b935d
7 changed files with 62 additions and 7 deletions
--- a/.devops/main-cuda.Dockerfile
+++ b/.devops/main-cuda.Dockerfile
@ -13,8 +13,6 @@ WORKDIR /app
 ARG CUDA_DOCKER_ARCH=all
 # Set nvcc architecture
 ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
 # Enable cuBLAS
 ENV GGML_CUDA=1
 RUN apt-get update && \
    apt-get install -y build-essential libsdl2-dev wget cmake git \
@ -25,7 +23,8 @@ ENV CUDA_MAIN_VERSION=12.3
 ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
 COPY .. .
-RUN make base.en
+# Enable cuBLAS
 RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1"
 FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
 ENV CUDA_MAIN_VERSION=12.3
@ -37,4 +36,5 @@ RUN apt-get update && \
  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 COPY --from=build /app /app
 ENV PATH=/app/build/bin:$PATH
 ENTRYPOINT [ "bash", "-c" ]
--- a/.devops/main-musa.Dockerfile
+++ b/.devops/main-musa.Dockerfile
@ -0,0 +1,29 @@
 ARG UBUNTU_VERSION=22.04
 # This needs to generally match the container host's environment.
 ARG MUSA_VERSION=rc3.1.1
 # Target the MUSA build image
 ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
 # Target the MUSA runtime image
 ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
 FROM ${BASE_MUSA_DEV_CONTAINER} AS build
 WORKDIR /app
 RUN apt-get update && \
    apt-get install -y build-essential libsdl2-dev wget cmake git \
    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 COPY .. .
 # Enable muBLAS
 RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1"
 FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
 WORKDIR /app
 RUN apt-get update && \
  apt-get install -y curl ffmpeg wget cmake git \
  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 COPY --from=build /app /app
 ENV PATH=/app/build/bin:$PATH
 ENTRYPOINT [ "bash", "-c" ]
--- a/.devops/main.Dockerfile
+++ b/.devops/main.Dockerfile
@ -16,4 +16,5 @@ RUN apt-get update && \
  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 COPY --from=build /app /app
 ENV PATH=/app/build/bin:$PATH
 ENTRYPOINT [ "bash", "-c" ]
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,3 @@
 build*/
 .github/
 .devops/
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -18,6 +18,7 @@ jobs:
      matrix:
        config:
          - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
          - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
          #TODO: the cuda image keeps failing - disable for now
          #      https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
          #- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
--- a/4
+++ b/4
@ -4,7 +4,7 @@
 .PHONY: build
 build:
-	cmake -B build
+	cmake -B build $(CMAKE_ARGS)
 	cmake --build build --config Release
 # download a few audio samples into folder "./samples":
@ -41,7 +41,7 @@ samples:
 tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
 	bash ./models/download-ggml-model.sh $@
-	cmake -B build
+	cmake -B build $(CMAKE_ARGS)
 	cmake --build build --config Release
 	@echo ""
 	@echo "==============================================="
--- a/README.md
+++ b/README.md
@ -23,6 +23,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
 - [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
 - [OpenVINO Support](#openvino-support)
 - [Ascend NPU Support](#ascend-npu-support)
 - [Moore Threads GPU Support](#moore-threads-gpu-support)
 - [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
 Supported platforms:
@ -381,6 +382,25 @@ Run the inference examples as usual, for example:
 - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
 - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
 ## Moore Threads GPU support
 With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
 First, make sure you have installed `MUSA SDK rc3.1.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=rc3.1.1
 Now build `whisper.cpp` with MUSA support:
 ```
 cmake -B build -DGGML_MUSA=1
 cmake --build build -j --config Release
 ```
 or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows:
 ```
 cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21"
 cmake --build build -j --config Release
 ```
 ## FFmpeg support (Linux only)
 If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
@ -425,6 +445,7 @@ We have two Docker images available for this project:
 1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
 2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
 3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`)
 ### Usage
@ -437,11 +458,11 @@ docker run -it --rm \
 docker run -it --rm \
  -v path/to/models:/models \
  -v path/to/audios:/audios \
-  whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
+  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav"
 # transcribe an audio file in samples folder
 docker run -it --rm \
  -v path/to/models:/models \
-  whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
+  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav"
 ```
 ## Installing with Conan