From 50218b935d69513f0fe28499b0ef9fba6470515e Mon Sep 17 00:00:00 2001 From: R0CKSTAR Date: Mon, 28 Apr 2025 16:06:41 +0800 Subject: [PATCH] build : Add Moore Threads GPU support and update GitHub workflow for MUSA build (#3069) * Update PATH for main/main-cuda container Signed-off-by: Xiaodong Ye * Add Dockerfile for musa, .dockerignore and update CI Signed-off-by: Xiaodong Ye * Add Moore Threads GPU Support in README.md and replace ./main with whisper-cli Signed-off-by: Xiaodong Ye * Forward GGML_CUDA/GGML_MUSA to cmake in Makefile Signed-off-by: Xiaodong Ye * Minor updates for PATH ENV in Dockerfiles Signed-off-by: Xiaodong Ye * Address comments Signed-off-by: Xiaodong Ye --------- Signed-off-by: Xiaodong Ye --- .devops/main-cuda.Dockerfile | 6 +++--- .devops/main-musa.Dockerfile | 29 +++++++++++++++++++++++++++++ .devops/main.Dockerfile | 1 + .dockerignore | 3 +++ .github/workflows/docker.yml | 1 + Makefile | 4 ++-- README.md | 25 +++++++++++++++++++++++-- 7 files changed, 62 insertions(+), 7 deletions(-) create mode 100644 .devops/main-musa.Dockerfile create mode 100644 .dockerignore diff --git a/.devops/main-cuda.Dockerfile b/.devops/main-cuda.Dockerfile index 75a395c7..b7ca281f 100644 --- a/.devops/main-cuda.Dockerfile +++ b/.devops/main-cuda.Dockerfile @@ -13,8 +13,6 @@ WORKDIR /app ARG CUDA_DOCKER_ARCH=all # Set nvcc architecture ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -# Enable cuBLAS -ENV GGML_CUDA=1 RUN apt-get update && \ apt-get install -y build-essential libsdl2-dev wget cmake git \ @@ -25,7 +23,8 @@ ENV CUDA_MAIN_VERSION=12.3 ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH COPY .. . -RUN make base.en +# Enable cuBLAS +RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1" FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime ENV CUDA_MAIN_VERSION=12.3 @@ -37,4 +36,5 @@ RUN apt-get update && \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* COPY --from=build /app /app +ENV PATH=/app/build/bin:$PATH ENTRYPOINT [ "bash", "-c" ] diff --git a/.devops/main-musa.Dockerfile b/.devops/main-musa.Dockerfile new file mode 100644 index 00000000..fa17a5a6 --- /dev/null +++ b/.devops/main-musa.Dockerfile @@ -0,0 +1,29 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG MUSA_VERSION=rc3.1.1 +# Target the MUSA build image +ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} +# Target the MUSA runtime image +ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_MUSA_DEV_CONTAINER} AS build +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y build-essential libsdl2-dev wget cmake git \ + && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* + +COPY .. . +# Enable muBLAS +RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1" + +FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y curl ffmpeg wget cmake git \ + && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* + +COPY --from=build /app /app +ENV PATH=/app/build/bin:$PATH +ENTRYPOINT [ "bash", "-c" ] diff --git a/.devops/main.Dockerfile b/.devops/main.Dockerfile index e8424126..e1eb9b33 100644 --- a/.devops/main.Dockerfile +++ b/.devops/main.Dockerfile @@ -16,4 +16,5 @@ RUN apt-get update && \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* COPY --from=build /app /app +ENV PATH=/app/build/bin:$PATH ENTRYPOINT [ "bash", "-c" ] diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..7c5e2438 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +build*/ +.github/ +.devops/ \ No newline at end of file diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 55f75f0c..d8e093a5 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -18,6 +18,7 @@ jobs: matrix: config: - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" } + - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" } #TODO: the cuda image keeps failing - disable for now # https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339 #- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" } diff --git a/Makefile b/Makefile index dbda58ac..359e701b 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ .PHONY: build build: - cmake -B build + cmake -B build $(CMAKE_ARGS) cmake --build build --config Release # download a few audio samples into folder "./samples": @@ -41,7 +41,7 @@ samples: tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo: bash ./models/download-ggml-model.sh $@ - cmake -B build + cmake -B build $(CMAKE_ARGS) cmake --build build --config Release @echo "" @echo "===============================================" diff --git a/README.md b/README.md index f51b88de..c9aa8215 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp - [Efficient GPU support for NVIDIA](#nvidia-gpu-support) - [OpenVINO Support](#openvino-support) - [Ascend NPU Support](#ascend-npu-support) +- [Moore Threads GPU Support](#moore-threads-gpu-support) - [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h) Supported platforms: @@ -381,6 +382,25 @@ Run the inference examples as usual, for example: - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag. - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`. +## Moore Threads GPU support + +With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels. +First, make sure you have installed `MUSA SDK rc3.1.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=rc3.1.1 + +Now build `whisper.cpp` with MUSA support: + +``` +cmake -B build -DGGML_MUSA=1 +cmake --build build -j --config Release +``` + +or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows: + +``` +cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21" +cmake --build build -j --config Release +``` + ## FFmpeg support (Linux only) If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration. @@ -425,6 +445,7 @@ We have two Docker images available for this project: 1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`) 2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`) +3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`) ### Usage @@ -437,11 +458,11 @@ docker run -it --rm \ docker run -it --rm \ -v path/to/models:/models \ -v path/to/audios:/audios \ - whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav" + whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav" # transcribe an audio file in samples folder docker run -it --rm \ -v path/to/models:/models \ - whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav" + whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav" ``` ## Installing with Conan