]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
musa: support new arch mp_31 and update doc (#12296)
authorR0CKSTAR <redacted>
Mon, 10 Mar 2025 17:18:25 +0000 (01:18 +0800)
committerGitHub <redacted>
Mon, 10 Mar 2025 17:18:25 +0000 (18:18 +0100)
Signed-off-by: Xiaodong Ye <redacted>
Makefile
docs/build.md
ggml/src/ggml-musa/CMakeLists.txt

index 5339d490b4e6862d72d08a935bf7d0bfbd3332a6..1f9455eff0aec5c5ff48c58a895b1ca42cfb700b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -836,7 +836,7 @@ ifdef GGML_MUSA
        else
                MUSA_PATH ?= /opt/musa
        endif
-       MUSA_ARCHITECTURES ?= 21;22
+       MUSA_ARCHITECTURES ?= 21;22;31
 
        MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA
        MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib
index 3d8333328fce0d8f9f27d7d9bf9272a33e3a353a..2e3975c14536040c6fbc41a7296bef312362e159 100644 (file)
@@ -197,28 +197,52 @@ The following compilation options are also available to tweak performance:
 
 ## MUSA
 
-This provides GPU acceleration using the MUSA cores of your Moore Threads MTT GPU. Make sure to have the MUSA SDK installed. You can download it from here: [MUSA SDK](https://developer.mthreads.com/sdk/download/musa).
+This provides GPU acceleration using a Moore Threads GPU. Make sure to have the [MUSA SDK](https://developer.mthreads.com/musa/musa-sdk) installed.
 
-- Using `CMake`:
+#### Download directly from Moore Threads
 
-  ```bash
-  cmake -B build -DGGML_MUSA=ON
-  cmake --build build --config Release
-  ```
+You may find the official downloads here: [Moore Threads developer site](https://developer.mthreads.com/sdk/download/musa).
 
-  For static build:
+### Compilation
 
-  ```bash
+```bash
+cmake -B build -DGGML_MUSA=ON
+cmake --build build --config Release
+```
+
+#### Override Compute Capability Specifications
+
+By default, all supported compute capabilities are enabled. To customize this behavior, you can specify the `MUSA_ARCHITECTURES` option in the CMake command:
+
+```bash
+cmake -B build -DGGML_MUSA=ON -DMUSA_ARCHITECTURES="21"
+```
+
+This configuration enables only compute capability `2.1` (MTT S80) during compilation, which can help reduce compilation time.
+
+#### Compilation options
+
+Most of the compilation options available for CUDA should also be available for MUSA, though they haven't been thoroughly tested yet.
+
+- For static builds, add `-DBUILD_SHARED_LIBS=OFF` and `-DCMAKE_POSITION_INDEPENDENT_CODE=ON`:
+  ```
   cmake -B build -DGGML_MUSA=ON \
     -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
   cmake --build build --config Release
   ```
 
-The environment variable [`MUSA_VISIBLE_DEVICES`](https://docs.mthreads.com/musa-sdk/musa-sdk-doc-online/programming_guide/Z%E9%99%84%E5%BD%95/) can be used to specify which GPU(s) will be used.
+### Runtime MUSA environmental variables
 
-The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted.
+You may set the [musa environmental variables](https://docs.mthreads.com/musa-sdk/musa-sdk-doc-online/programming_guide/Z%E9%99%84%E5%BD%95/) at runtime.
 
-Most of the compilation options available for CUDA should also be available for MUSA, though they haven't been thoroughly tested yet.
+```bash
+# Use `MUSA_VISIBLE_DEVICES` to hide the first compute device.
+MUSA_VISIBLE_DEVICES="-0" ./build/bin/llama-server --model /srv/models/llama.gguf
+```
+
+### Unified Memory
+
+The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted.
 
 ## HIP
 
index 2c75abf61d67242e35b1528351c124dc6c2c084e..166970ca6bfb8eb3bb235cdf5c71d28db5a65a08 100644 (file)
@@ -21,7 +21,7 @@ if (MUSAToolkit_FOUND)
     message(STATUS "MUSA Toolkit found")
 
     if (NOT DEFINED MUSA_ARCHITECTURES)
-        set(MUSA_ARCHITECTURES "21;22")
+        set(MUSA_ARCHITECTURES "21;22;31")
     endif()
     message(STATUS "Using MUSA architectures: ${MUSA_ARCHITECTURES}")