devops: add s390x & ppc64le CI (#15925)

author Aaron Teo <redacted>

Fri, 26 Sep 2025 18:03:33 +0000 (02:03 +0800)

committer GitHub <redacted>

Fri, 26 Sep 2025 18:03:33 +0000 (02:03 +0800)
author Aaron Teo <redacted>
Fri, 26 Sep 2025 18:03:33 +0000 (02:03 +0800)
committer GitHub <redacted>
Fri, 26 Sep 2025 18:03:33 +0000 (02:03 +0800)
diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml

index 04ad187d35c091ae2e7534f3f20c01fa4cdeb9cc..e0e809ffd1ef252f85b78709ee5ce57dec136db3 100644 (file)
--- a/.github/workflows/build-linux-cross.yml
+++ b/.github/workflows/build-linux-cross.yml
@@ -141,97 +141,6 @@ jobs:
  
    #         cmake --build build --config Release -j $(nproc)
  
-  ubuntu-24-ppc64el-cpu-cross:
-    runs-on: ubuntu-24.04
-
-    steps:
-      - uses: actions/checkout@v4
-      - name: Setup PowerPC64le
-        run: |
-          sudo dpkg --add-architecture ppc64el
-
-          # Add arch-specific repositories for non-amd64 architectures
-          cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
-          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
-          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
-          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
-          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
-          EOF
-
-          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
-
-          sudo apt-get install -y --no-install-recommends \
-                  build-essential \
-                  gcc-14-powerpc64le-linux-gnu \
-                  g++-14-powerpc64le-linux-gnu
-
-      - name: Build
-        run: |
-          cmake -B build -DLLAMA_CURL=OFF \
-                         -DCMAKE_BUILD_TYPE=Release \
-                         -DGGML_OPENMP=OFF \
-                         -DLLAMA_BUILD_EXAMPLES=ON \
-                         -DLLAMA_BUILD_TOOLS=ON \
-                         -DLLAMA_BUILD_TESTS=OFF \
-                         -DCMAKE_SYSTEM_NAME=Linux \
-                         -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
-                         -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
-                         -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
-                         -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-                         -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
-                         -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
-                         -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
-                         -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
-
-          cmake --build build --config Release -j $(nproc)
-
-  # ubuntu-24-ppc64el-vulkan-cross:
-  #   runs-on: ubuntu-24.04
-
-  #   steps:
-  #     - uses: actions/checkout@v4
-  #     - name: Setup PowerPC64le
-  #       run: |
-  #         sudo dpkg --add-architecture ppc64el
-
-  #         # Add arch-specific repositories for non-amd64 architectures
-  #         cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
-  #         deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
-  #         deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
-  #         deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
-  #         deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
-  #         EOF
-
-  #         sudo apt-get update || true    ;# Prevent failure due to missing URLs.
-
-  #         sudo apt-get install -y --no-install-recommends \
-  #                 build-essential \
-  #                 glslc \
-  #                 gcc-14-powerpc64le-linux-gnu \
-  #                 g++-14-powerpc64le-linux-gnu \
-  #                 libvulkan-dev:ppc64el
-
-  #     - name: Build
-  #       run: |
-  #         cmake -B build -DLLAMA_CURL=OFF \
-  #                        -DCMAKE_BUILD_TYPE=Release \
-  #                        -DGGML_VULKAN=ON \
-  #                        -DGGML_OPENMP=OFF \
-  #                        -DLLAMA_BUILD_EXAMPLES=ON \
-  #                        -DLLAMA_BUILD_TOOLS=ON \
-  #                        -DLLAMA_BUILD_TESTS=OFF \
-  #                        -DCMAKE_SYSTEM_NAME=Linux \
-  #                        -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
-  #                        -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
-  #                        -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
-  #                        -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-  #                        -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
-  #                        -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
-  #                        -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
-  #                        -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
-
-  #         cmake --build build --config Release -j $(nproc)
-
    debian-13-loongarch64-cpu-cross:
      runs-on: ubuntu-24.04
      container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml

index 9706d2962931d09d4d0c7648a3874c45dbb8db9b..424b4ba78661049045006b16b079cd98e7a0a062 100644 (file)
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -192,6 +192,10 @@ jobs:
              os: ubuntu-22.04
            - build: 'arm64'
              os: ubuntu-22.04-arm
+          - build: 's390x'
+            os: ubuntu-24.04-s390x
+          - build: 'ppc64le'
+            os: ubuntu-24.04-ppc64le
  
      runs-on: ${{ matrix.os }}
  
@@ -206,11 +210,28 @@ jobs:
            key: ubuntu-cpu-cmake
            evict-old-files: 1d
  
-      - name: Dependencies
-        id: depends
+      - name: Build Dependencies
+        id: build_depends
          run: |
            sudo apt-get update
-          sudo apt-get install build-essential libcurl4-openssl-dev
+          sudo apt-get install -y --no-install-recommends \
+            python3 python3-pip python3-dev \
+            libjpeg-dev build-essential libcurl4-openssl-dev \
+            git-lfs
+
+      - name: Python Dependencies
+        id: python_depends
+        run: |
+          python3 -m pip install --upgrade pip
+          pip3 install ./gguf-py
+
+      - name: Swap Endianness
+        id: endianness
+        if: ${{ matrix.build == 's390x' }}
+        run: |
+          for f in models/*.gguf; do
+            echo YES | python3 gguf-py/gguf/scripts/gguf_convert_endian.py $f big
+          done
  
        - name: Build
          id: cmake_build
@@ -228,6 +249,7 @@ jobs:
  
        - name: Test llama2c conversion
          id: llama2c_test
+        if: ${{ matrix.build != 's390x' }}
          run: |
            cd build
            echo "Fetch tokenizer"
@@ -237,6 +259,15 @@ jobs:
            ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
            ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
  
+      - name: Test llama2c (s390x)
+        id: llama2c_test_s390x
+        if: ${{ matrix.build == 's390x' }}
+        run: |
+          cd build
+          echo "Fetch llama2c big-endian model"
+          wget https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-be.gguf
+          ./bin/llama-cli -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
+
    ubuntu-latest-cmake-sanitizer:
      runs-on: ubuntu-latest
  
diff --git a/common/common.cpp b/common/common.cpp

index 6ebd934154eb5b64b8bab586c98f8f12af181a14..c1e736c44cf56abcaa60de141fc030c76e6e3be6 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -51,6 +51,11 @@
  #include <unistd.h>
  #endif
  
+#if defined(__linux__)
+#include <sys/types.h>
+#include <pwd.h>
+#endif
+
  #if defined(_MSC_VER)
  #pragma warning(disable: 4244 4267) // possible loss of data
  #endif
@@ -865,8 +870,20 @@ std::string fs_get_cache_directory() {
  #if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) || defined(__OpenBSD__)
          if (std::getenv("XDG_CACHE_HOME")) {
              cache_directory = std::getenv("XDG_CACHE_HOME");
-        } else {
+        } else if (std::getenv("HOME")) {
              cache_directory = std::getenv("HOME") + std::string("/.cache/");
+        } else {
+#if defined(__linux__)
+            /* no $HOME is defined, fallback to getpwuid */
+            struct passwd *pw = getpwuid(getuid());
+            if ((!pw) || (!pw->pw_dir)) {
+                throw std::runtime_error("Failed to find $HOME directory");
+            }
+
+            cache_directory = std::string(pw->pw_dir) + std::string("/.cache/");
+#else /* defined(__linux__) */
+            throw std::runtime_error("Failed to find $HOME directory");
+#endif /* defined(__linux__) */
          }
  #elif defined(__APPLE__)
          cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
diff --git a/examples/eval-callback/CMakeLists.txt b/examples/eval-callback/CMakeLists.txt

index 95915ed91c09912da1e4b18b719dd878508d219e..c514e4317ee09e46550e16d33c91a0f7ba45b66b 100644 (file)
--- a/examples/eval-callback/CMakeLists.txt
+++ b/examples/eval-callback/CMakeLists.txt
@@ -5,6 +5,11 @@ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
  target_compile_features(${TARGET} PRIVATE cxx_std_17)
  
  set(TEST_TARGET test-eval-callback)
-add_test(NAME ${TEST_TARGET}
-        COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0)
+if(NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
+        add_test(NAME ${TEST_TARGET}
+                        COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0)
+else()
+        add_test(NAME ${TEST_TARGET}
+                        COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K-be.gguf --model stories260K-be.gguf --prompt hello --seed 42 -ngl 0)
+endif()
  set_property(TEST ${TEST_TARGET} PROPERTY LABELS eval-callback curl)
diff --git a/ggml/src/ggml-cpu/arch/s390/quants.c b/ggml/src/ggml-cpu/arch/s390/quants.c

index a19ee68c13d64d1c96dad9d5a40e162e615831a3..19d225a48379491aea39dd0bef7dbde554af07ee 100644 (file)
--- a/ggml/src/ggml-cpu/arch/s390/quants.c
+++ b/ggml/src/ggml-cpu/arch/s390/quants.c
@@ -75,7 +75,8 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i
  
          for (int j = 0; j < 8; j++) {
              const float32x4_t v = vec_mul(srcv[j], vec_splats(id));
-            const int32x4_t vi = vec_signed(v);
+            /* Uses non-default rounding for vec_signed or vec_round */
+            const int32x4_t vi = vec_signed(__builtin_s390_vfisb(v, 4, 1));
  
              y[i].qs[4*j + 0] = vec_extract(vi, 0);
              y[i].qs[4*j + 1] = vec_extract(vi, 1);
@@ -122,7 +123,8 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i
  
          for (int j = 0; j < 8; j++) {
              const float32x4_t v = vec_mul(srcv[j], vec_splats(id));
-            const int32x4_t vi = vec_signed(v);
+            /* Uses non-default rounding for vec_signed or vec_round */
+            const int32x4_t vi = vec_signed(__builtin_s390_vfisb(v, 4, 1));
  
              y[i].qs[4*j + 0] = vec_extract(vi, 0);
              y[i].qs[4*j + 1] = vec_extract(vi, 1);
@@ -731,7 +733,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
      uint8x16_t q3h[4];
      uint8x16_t q3b[2];
      int8x16_t q3bytes[4];
-    int8x16_t q8bytes[4];
+    int8x16_t q8bytes[8];
      uint8x16_t qhbits[2];
  
      float sum = 0;
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c

index fe36bab8362b2be68e956ed7bf3e8712092ef030..a5796214f2510d9dd2bdd4ef3210fa6adcfe09ec 100644 (file)
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -3927,7 +3927,7 @@ static struct ggml_tensor * ggml_rope_impl(
      memcpy(params +  8, &attn_factor,  sizeof(float));
      memcpy(params +  9, &beta_fast,    sizeof(float));
      memcpy(params + 10, &beta_slow,    sizeof(float));
-    if (mrope_used) {
+    if (mrope_used && sections) {
          memcpy(params + 11, sections,  sizeof(int32_t) * GGML_MROPE_SECTIONS);
      } else {
          memset(params + 11, 0,         sizeof(int32_t) * GGML_MROPE_SECTIONS);
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp

index 8cb36661a0c968d15a1d6df1a262087b0803d370..da938af03bf080828e23e2aaa1dc4dbb3fd64158 100644 (file)
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1772,7 +1772,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                  const size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
                  const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
                  precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
-#ifdef IS_BIG_ENDIAN
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
                  // correct endiannes of data in precompiled_charsmap binary blob
                  uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0];
                  *xcda_blob_size = __builtin_bswap32(*xcda_blob_size);
diff --git a/src/unicode.h b/src/unicode.h

index 0a5fa2a78ceff3d98031110d45d495b17f56626e..5bd1362ff41bf76c36dff72973533ac89d14b6d7 100644 (file)
--- a/src/unicode.h
+++ b/src/unicode.h
@@ -4,6 +4,7 @@
  #include <string>
  #include <vector>
  
+// TODO: reimplement this structure in endian-independent way
  struct unicode_cpt_flags {
      enum {
          UNDEFINED       = 0x0001,
@@ -15,6 +16,10 @@ struct unicode_cpt_flags {
          SYMBOL          = 0x0040,  // regex: \p{S}
          CONTROL         = 0x0080,  // regex: \p{C}
          MASK_CATEGORIES = 0x00FF,
+        WHITESPACE      = 0x0100,
+        LOWERCASE       = 0x0200,
+        UPPERCASE       = 0x0400,
+        NFD             = 0x0800,
      };
  
      // codepoint type
@@ -34,11 +39,49 @@ struct unicode_cpt_flags {
  
      // decode from uint16
      inline unicode_cpt_flags(const uint16_t flags = 0) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
          *reinterpret_cast<uint16_t*>(this) = flags;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+        is_undefined   = (flags & UNDEFINED)   ? 1 : 0;
+        is_number      = (flags & NUMBER)      ? 1 : 0;
+        is_letter      = (flags & LETTER)      ? 1 : 0;
+        is_separator   = (flags & SEPARATOR)   ? 1 : 0;
+        is_accent_mark = (flags & ACCENT_MARK) ? 1 : 0;
+        is_punctuation = (flags & PUNCTUATION) ? 1 : 0;
+        is_symbol      = (flags & SYMBOL)      ? 1 : 0;
+        is_control     = (flags & CONTROL)     ? 1 : 0;
+        is_whitespace  = (flags & WHITESPACE)  ? 1 : 0;
+        is_lowercase   = (flags & LOWERCASE)   ? 1 : 0;
+        is_uppercase   = (flags & UPPERCASE)   ? 1 : 0;
+        is_nfd         = (flags & NFD)         ? 1 : 0;
+#else
+#error Unexpected or undefined __BYTE_ORDER__
+#endif
      }
  
      inline uint16_t as_uint() const {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
          return *reinterpret_cast<const uint16_t*>(this);
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+        uint16_t result =
+              is_undefined   * UNDEFINED
+            + is_number      * NUMBER
+            + is_letter      * LETTER
+            + is_separator   * SEPARATOR
+            + is_accent_mark * ACCENT_MARK
+            + is_punctuation * PUNCTUATION
+            + is_symbol      * SYMBOL
+            + is_control     * CONTROL
+            + is_whitespace  * WHITESPACE
+            + is_lowercase   * LOWERCASE
+            + is_uppercase   * UPPERCASE
+            + is_nfd         * NFD
+            ;
+
+        return result;
+#else
+#error Unexpected or undefined __BYTE_ORDER__
+#endif
      }
  
      inline uint16_t category_flag() const {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt

index 3e9e082d93c18309f2a38485fbbf573d85239ce7..d9cc5e933f4ce7785475a6eac02be395ce8556df 100644 (file)
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -185,7 +185,11 @@ llama_build_and_test(test-json-partial.cpp)
  llama_build_and_test(test-log.cpp)
  llama_build_and_test(test-regex-partial.cpp)
  
-llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
+if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
+    llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
+else()
+    llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-be.Q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
+endif()
  
  # this fails on windows (github hosted runner) due to curl DLL not found (exit code 0xc0000135)
  if (NOT WIN32)
diff --git a/tests/test-tokenizers-repo.sh b/tests/test-tokenizers-repo.sh

index 1158aebae0f1aab7ed2b7b34b42a7cb5bf481842..94a3d05ba5a8d7e0d2c65f3322f82eaebf77f398 100755 (executable)
--- a/tests/test-tokenizers-repo.sh
+++ b/tests/test-tokenizers-repo.sh
@@ -23,6 +23,13 @@ if [ -d $folder ] && [ -d $folder/.git ]; then
      (cd $folder; git pull)
  else
      git clone $repo $folder
+
+    # byteswap models if on big endian
+    if [ "$(uname -m)" = s390x ]; then
+        for f in $folder/*/*.gguf; do
+            echo YES | python3 "$(dirname $0)/../gguf-py/gguf/scripts/gguf_convert_endian.py" $f big
+        done
+    fi
  fi
  
  shopt -s globstar
author	Aaron Teo <redacted>
	Fri, 26 Sep 2025 18:03:33 +0000 (02:03 +0800)
committer	GitHub <redacted>
	Fri, 26 Sep 2025 18:03:33 +0000 (02:03 +0800)
.github/workflows/build-linux-cross.yml		patch \| blob \| history
.github/workflows/build.yml		patch \| blob \| history
common/common.cpp		patch \| blob \| history
examples/eval-callback/CMakeLists.txt		patch \| blob \| history
ggml/src/ggml-cpu/arch/s390/quants.c		patch \| blob \| history
ggml/src/ggml.c		patch \| blob \| history
src/llama-vocab.cpp		patch \| blob \| history
src/unicode.h		patch \| blob \| history
tests/CMakeLists.txt		patch \| blob \| history
tests/test-tokenizers-repo.sh		patch \| blob \| history