cmake : fix compile assumptions for power9/etc (#2777)

author midnight <redacted>

Wed, 5 Feb 2025 12:41:10 +0000 (04:41 -0800)

committer GitHub <redacted>

Wed, 5 Feb 2025 12:41:10 +0000 (14:41 +0200)
author midnight <redacted>
Wed, 5 Feb 2025 12:41:10 +0000 (04:41 -0800)
committer GitHub <redacted>
Wed, 5 Feb 2025 12:41:10 +0000 (14:41 +0200)
diff --git a/README.md b/README.md

index 14609866fc6ab69e26747cae3b0aaa50fe108e79..9748969c30fb6cfd7b4df5bb38cf22407cca5c8e 100644 (file)
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
  - Plain C/C++ implementation without dependencies
  - Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
  - AVX intrinsics support for x86 architectures
-- VSX intrinsics support for POWER architectures
+- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics)
  - Mixed F16 / F32 precision
  - [Integer quantization support](#quantization)
  - Zero memory allocations at runtime
@@ -139,6 +139,20 @@ make -j large-v3-turbo
  | medium | 1.5 GiB | ~2.1 GB |
  | large  | 2.9 GiB | ~3.9 GB |
  
+## POWER VSX Intrinsics
+
+`whisper.cpp` supports POWER architectures and includes code which
+significantly speeds operation on Linux running on POWER9/10, making it
+capable of faster-than-realtime transcription on underclocked Raptor
+Talos II. Ensure you have a BLAS package installed, and replace the
+standard cmake setup with:
+
+```bash
+# build with GGML_BLAS defined
+cmake -B build -DGGML_BLAS=1
+cmake --build build --config Release
+./build/bin/whisper-cli [ .. etc .. ]
+
  ## Quantization
  
  `whisper.cpp` supports integer quantization of the Whisper `ggml` models.
diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt

index 6b3641c4263c711d176aecf678075a44ae25f9b0..26533e512aef3b554e647cdc799c69c18697bade 100644 (file)
--- a/ggml/src/ggml-cpu/CMakeLists.txt
+++ b/ggml/src/ggml-cpu/CMakeLists.txt
@@ -279,19 +279,15 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
          endif()
      elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
          message(STATUS "PowerPC detected")
-        execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER10_M)
-        string(FIND "${POWER10_M}" "POWER10" substring_index)
-        if (NOT DEFINED substring_index OR "${substring_index}" STREQUAL "")
-            set(substring_index -1)
-        endif()
-
-        if (${substring_index} GREATER_EQUAL 0)
-        list(APPEND ARCH_FLAGS -mcpu=power10)
+        execute_process(COMMAND bash -c "grep POWER /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER_M)
+        if (${POWER_M} MATCHES "POWER10")
+            list(APPEND ARCH_FLAGS -mcpu=power10)
+        elseif (${POWER_M} MATCHES "POWER9")
+            list(APPEND ARCH_FLAGS -mcpu=power9)
          elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
-        list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
+            list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
          else()
-            list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
-            # TODO: Add  targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
+            list(APPEND ARCH_FLAGS -mcpu=powerpc64 -mtune=native)
          endif()
      elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
          message(STATUS "loongarch64 detected")
author	midnight <redacted>
	Wed, 5 Feb 2025 12:41:10 +0000 (04:41 -0800)
committer	GitHub <redacted>
	Wed, 5 Feb 2025 12:41:10 +0000 (14:41 +0200)
README.md		patch \| blob \| history
ggml/src/ggml-cpu/CMakeLists.txt		patch \| blob \| history