]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
build : Add Moore Threads GPU support and update GitHub workflow for MUSA build ...
authorR0CKSTAR <redacted>
Mon, 28 Apr 2025 08:06:41 +0000 (16:06 +0800)
committerGitHub <redacted>
Mon, 28 Apr 2025 08:06:41 +0000 (11:06 +0300)
* Update PATH for main/main-cuda container

Signed-off-by: Xiaodong Ye <redacted>
* Add Dockerfile for musa, .dockerignore and update CI

Signed-off-by: Xiaodong Ye <redacted>
* Add Moore Threads GPU Support in README.md and replace ./main with whisper-cli

Signed-off-by: Xiaodong Ye <redacted>
* Forward GGML_CUDA/GGML_MUSA to cmake in Makefile

Signed-off-by: Xiaodong Ye <redacted>
* Minor updates for PATH ENV in Dockerfiles

Signed-off-by: Xiaodong Ye <redacted>
* Address comments

Signed-off-by: Xiaodong Ye <redacted>
---------

Signed-off-by: Xiaodong Ye <redacted>
.devops/main-cuda.Dockerfile
.devops/main-musa.Dockerfile [new file with mode: 0644]
.devops/main.Dockerfile
.dockerignore [new file with mode: 0644]
.github/workflows/docker.yml
Makefile
README.md

index 75a395c70f2f3a0cfbae02311d04618d872d7515..b7ca281f5b3f16cc32f9bd4b30a301eb64cc1dbe 100644 (file)
@@ -13,8 +13,6 @@ WORKDIR /app
 ARG CUDA_DOCKER_ARCH=all
 # Set nvcc architecture
 ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
-# Enable cuBLAS
-ENV GGML_CUDA=1
 
 RUN apt-get update && \
     apt-get install -y build-essential libsdl2-dev wget cmake git \
@@ -25,7 +23,8 @@ ENV CUDA_MAIN_VERSION=12.3
 ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
 
 COPY .. .
-RUN make base.en
+# Enable cuBLAS
+RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1"
 
 FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
 ENV CUDA_MAIN_VERSION=12.3
@@ -37,4 +36,5 @@ RUN apt-get update && \
   && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 
 COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
 ENTRYPOINT [ "bash", "-c" ]
diff --git a/.devops/main-musa.Dockerfile b/.devops/main-musa.Dockerfile
new file mode 100644 (file)
index 0000000..fa17a5a
--- /dev/null
@@ -0,0 +1,29 @@
+ARG UBUNTU_VERSION=22.04
+# This needs to generally match the container host's environment.
+ARG MUSA_VERSION=rc3.1.1
+# Target the MUSA build image
+ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+# Target the MUSA runtime image
+ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
+
+FROM ${BASE_MUSA_DEV_CONTAINER} AS build
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get install -y build-essential libsdl2-dev wget cmake git \
+    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY .. .
+# Enable muBLAS
+RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1"
+
+FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
+WORKDIR /app
+
+RUN apt-get update && \
+  apt-get install -y curl ffmpeg wget cmake git \
+  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
+ENTRYPOINT [ "bash", "-c" ]
index e8424126057299d78d9b25abf64973a528f31d38..e1eb9b337008485f7eb049c87401d3c3f3256567 100644 (file)
@@ -16,4 +16,5 @@ RUN apt-get update && \
   && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 
 COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
 ENTRYPOINT [ "bash", "-c" ]
diff --git a/.dockerignore b/.dockerignore
new file mode 100644 (file)
index 0000000..7c5e243
--- /dev/null
@@ -0,0 +1,3 @@
+build*/
+.github/
+.devops/
\ No newline at end of file
index 55f75f0c83e425a6271de5f15479c324137c8f62..d8e093a5edb7538f04af9d928e3a0b89e711757a 100644 (file)
@@ -18,6 +18,7 @@ jobs:
       matrix:
         config:
           - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
+          - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
           #TODO: the cuda image keeps failing - disable for now
           #      https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
           #- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
index dbda58acc446e35aab3dcb0ccdf15604c636a001..359e701b00683e1a758bbb33fd6ff40d8970ea8c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@
 
 .PHONY: build
 build:
-       cmake -B build
+       cmake -B build $(CMAKE_ARGS)
        cmake --build build --config Release
 
 # download a few audio samples into folder "./samples":
@@ -41,7 +41,7 @@ samples:
 
 tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
        bash ./models/download-ggml-model.sh $@
-       cmake -B build
+       cmake -B build $(CMAKE_ARGS)
        cmake --build build --config Release
        @echo ""
        @echo "==============================================="
index f51b88ded11d9d233035a134fed285c1dfa3929e..c9aa82157b37c104807e91eeaffb8bd6fe1cfcf8 100644 (file)
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
 - [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
 - [OpenVINO Support](#openvino-support)
 - [Ascend NPU Support](#ascend-npu-support)
+- [Moore Threads GPU Support](#moore-threads-gpu-support)
 - [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
 
 Supported platforms:
@@ -381,6 +382,25 @@ Run the inference examples as usual, for example:
 - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
 - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
 
+## Moore Threads GPU support
+
+With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
+First, make sure you have installed `MUSA SDK rc3.1.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=rc3.1.1
+
+Now build `whisper.cpp` with MUSA support:
+
+```
+cmake -B build -DGGML_MUSA=1
+cmake --build build -j --config Release
+```
+
+or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows:
+
+```
+cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21"
+cmake --build build -j --config Release
+```
+
 ## FFmpeg support (Linux only)
 
 If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
@@ -425,6 +445,7 @@ We have two Docker images available for this project:
 
 1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
 2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
+3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`)
 
 ### Usage
 
@@ -437,11 +458,11 @@ docker run -it --rm \
 docker run -it --rm \
   -v path/to/models:/models \
   -v path/to/audios:/audios \
-  whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
+  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav"
 # transcribe an audio file in samples folder
 docker run -it --rm \
   -v path/to/models:/models \
-  whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
+  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav"
 ```
 
 ## Installing with Conan