build : Add Moore Threads GPU support and update GitHub workflow for MUSA build ...

author R0CKSTAR <redacted>

Mon, 28 Apr 2025 08:06:41 +0000 (16:06 +0800)

committer GitHub <redacted>

Mon, 28 Apr 2025 08:06:41 +0000 (11:06 +0300)
author R0CKSTAR <redacted>
Mon, 28 Apr 2025 08:06:41 +0000 (16:06 +0800)
committer GitHub <redacted>
Mon, 28 Apr 2025 08:06:41 +0000 (11:06 +0300)
diff --git a/.devops/main-cuda.Dockerfile b/.devops/main-cuda.Dockerfile

index 75a395c70f2f3a0cfbae02311d04618d872d7515..b7ca281f5b3f16cc32f9bd4b30a301eb64cc1dbe 100644 (file)
--- a/.devops/main-cuda.Dockerfile
+++ b/.devops/main-cuda.Dockerfile
@@ -13,8 +13,6 @@ WORKDIR /app
  ARG CUDA_DOCKER_ARCH=all
  # Set nvcc architecture
  ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
-# Enable cuBLAS
-ENV GGML_CUDA=1
  
  RUN apt-get update && \
      apt-get install -y build-essential libsdl2-dev wget cmake git \
@@ -25,7 +23,8 @@ ENV CUDA_MAIN_VERSION=12.3
  ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
  
  COPY .. .
-RUN make base.en
+# Enable cuBLAS
+RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1"
  
  FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
  ENV CUDA_MAIN_VERSION=12.3
@@ -37,4 +36,5 @@ RUN apt-get update && \
    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
  
  COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
  ENTRYPOINT [ "bash", "-c" ]
diff --git a/.devops/main-musa.Dockerfile b/.devops/main-musa.Dockerfile

new file mode 100644 (file)

index 0000000..fa17a5a
--- /dev/null
+++ b/.devops/main-musa.Dockerfile
@@ -0,0 +1,29 @@
+ARG UBUNTU_VERSION=22.04
+# This needs to generally match the container host's environment.
+ARG MUSA_VERSION=rc3.1.1
+# Target the MUSA build image
+ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+# Target the MUSA runtime image
+ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
+
+FROM ${BASE_MUSA_DEV_CONTAINER} AS build
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get install -y build-essential libsdl2-dev wget cmake git \
+    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY .. .
+# Enable muBLAS
+RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1"
+
+FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
+WORKDIR /app
+
+RUN apt-get update && \
+  apt-get install -y curl ffmpeg wget cmake git \
+  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
+ENTRYPOINT [ "bash", "-c" ]
diff --git a/.devops/main.Dockerfile b/.devops/main.Dockerfile

index e8424126057299d78d9b25abf64973a528f31d38..e1eb9b337008485f7eb049c87401d3c3f3256567 100644 (file)
--- a/.devops/main.Dockerfile
+++ b/.devops/main.Dockerfile
@@ -16,4 +16,5 @@ RUN apt-get update && \
    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
  
  COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
  ENTRYPOINT [ "bash", "-c" ]
diff --git a/.dockerignore b/.dockerignore

new file mode 100644 (file)

index 0000000..7c5e243
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,3 @@
+build*/
+.github/
+.devops/
+\ No newline at end of file
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml

index 55f75f0c83e425a6271de5f15479c324137c8f62..d8e093a5edb7538f04af9d928e3a0b89e711757a 100644 (file)
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -18,6 +18,7 @@ jobs:
        matrix:
          config:
            - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
+          - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
            #TODO: the cuda image keeps failing - disable for now
            #      https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
            #- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
diff --git a/Makefile b/Makefile

index dbda58acc446e35aab3dcb0ccdf15604c636a001..359e701b00683e1a758bbb33fd6ff40d8970ea8c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@
  
  .PHONY: build
  build:
-       cmake -B build
+       cmake -B build $(CMAKE_ARGS)
         cmake --build build --config Release
  
  # download a few audio samples into folder "./samples":
@@ -41,7 +41,7 @@ samples:
  
  tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
         bash ./models/download-ggml-model.sh $@
-       cmake -B build
+       cmake -B build $(CMAKE_ARGS)
         cmake --build build --config Release
         @echo ""
         @echo "==============================================="
diff --git a/README.md b/README.md

index f51b88ded11d9d233035a134fed285c1dfa3929e..c9aa82157b37c104807e91eeaffb8bd6fe1cfcf8 100644 (file)
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
  - [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
  - [OpenVINO Support](#openvino-support)
  - [Ascend NPU Support](#ascend-npu-support)
+- [Moore Threads GPU Support](#moore-threads-gpu-support)
  - [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
  
  Supported platforms:
@@ -381,6 +382,25 @@ Run the inference examples as usual, for example:
  - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
  - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
  
+## Moore Threads GPU support
+
+With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
+First, make sure you have installed `MUSA SDK rc3.1.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=rc3.1.1
+
+Now build `whisper.cpp` with MUSA support:
+
+```
+cmake -B build -DGGML_MUSA=1
+cmake --build build -j --config Release
+```
+
+or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows:
+
+```
+cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21"
+cmake --build build -j --config Release
+```
+
  ## FFmpeg support (Linux only)
  
  If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
@@ -425,6 +445,7 @@ We have two Docker images available for this project:
  
  1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
  2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
+3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`)
  
  ### Usage
  
@@ -437,11 +458,11 @@ docker run -it --rm \
  docker run -it --rm \
    -v path/to/models:/models \
    -v path/to/audios:/audios \
-  whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
+  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav"
  # transcribe an audio file in samples folder
  docker run -it --rm \
    -v path/to/models:/models \
-  whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
+  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav"
  ```
  
  ## Installing with Conan
author	R0CKSTAR <redacted>
	Mon, 28 Apr 2025 08:06:41 +0000 (16:06 +0800)
committer	GitHub <redacted>
	Mon, 28 Apr 2025 08:06:41 +0000 (11:06 +0300)
.devops/main-cuda.Dockerfile		patch \| blob \| history
.devops/main-musa.Dockerfile	[new file with mode: 0644]	patch \| blob
.devops/main.Dockerfile		patch \| blob \| history
.dockerignore	[new file with mode: 0644]	patch \| blob
.github/workflows/docker.yml		patch \| blob \| history
Makefile		patch \| blob \| history
README.md		patch \| blob \| history