ARG CUDA_DOCKER_ARCH=all
# Set nvcc architecture
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
-# Enable cuBLAS
-ENV GGML_CUDA=1
RUN apt-get update && \
apt-get install -y build-essential libsdl2-dev wget cmake git \
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
COPY .. .
-RUN make base.en
+# Enable cuBLAS
+RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1"
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
ENV CUDA_MAIN_VERSION=12.3
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
ENTRYPOINT [ "bash", "-c" ]
--- /dev/null
+ARG UBUNTU_VERSION=22.04
+# This needs to generally match the container host's environment.
+ARG MUSA_VERSION=rc3.1.1
+# Target the MUSA build image
+ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+# Target the MUSA runtime image
+ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
+
+FROM ${BASE_MUSA_DEV_CONTAINER} AS build
+WORKDIR /app
+
+RUN apt-get update && \
+ apt-get install -y build-essential libsdl2-dev wget cmake git \
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY .. .
+# Enable muBLAS
+RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1"
+
+FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
+WORKDIR /app
+
+RUN apt-get update && \
+ apt-get install -y curl ffmpeg wget cmake git \
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
+ENTRYPOINT [ "bash", "-c" ]
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
ENTRYPOINT [ "bash", "-c" ]
--- /dev/null
+build*/
+.github/
+.devops/
\ No newline at end of file
matrix:
config:
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
+ - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
#TODO: the cuda image keeps failing - disable for now
# https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
#- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
.PHONY: build
build:
- cmake -B build
+ cmake -B build $(CMAKE_ARGS)
cmake --build build --config Release
# download a few audio samples into folder "./samples":
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
bash ./models/download-ggml-model.sh $@
- cmake -B build
+ cmake -B build $(CMAKE_ARGS)
cmake --build build --config Release
@echo ""
@echo "==============================================="
- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
- [OpenVINO Support](#openvino-support)
- [Ascend NPU Support](#ascend-npu-support)
+- [Moore Threads GPU Support](#moore-threads-gpu-support)
- [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
Supported platforms:
- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
+## Moore Threads GPU support
+
+With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
+First, make sure you have installed `MUSA SDK rc3.1.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=rc3.1.1
+
+Now build `whisper.cpp` with MUSA support:
+
+```
+cmake -B build -DGGML_MUSA=1
+cmake --build build -j --config Release
+```
+
+or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows:
+
+```
+cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21"
+cmake --build build -j --config Release
+```
+
## FFmpeg support (Linux only)
If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
+3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`)
### Usage
docker run -it --rm \
-v path/to/models:/models \
-v path/to/audios:/audios \
- whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
+ whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav"
# transcribe an audio file in samples folder
docker run -it --rm \
-v path/to/models:/models \
- whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
+ whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav"
```
## Installing with Conan