From: Aaron Teo Date: Fri, 26 Sep 2025 18:03:33 +0000 (+0800) Subject: devops: add s390x & ppc64le CI (#15925) X-Git-Tag: upstream/0.0.6641~40 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=624207e676ab5eb3ce7af631902bb45fb73a8359;p=pkg%2Fggml%2Fsources%2Fllama.cpp devops: add s390x & ppc64le CI (#15925) * devops: move s390x and ppc64le ci build we have access to ubuntu-24.04-s390x and ppc64le images now Signed-off-by: Aaron Teo * devops: disable ppc64le for now since they have compiler errors Signed-off-by: Aaron Teo * devops: stop warnings as errors Signed-off-by: Aaron Teo * devops: switch to non-macro flag Signed-off-by: Aaron Teo * devops: going the llama macro route Signed-off-by: Aaron Teo * devops: add big-endian gguf test models Signed-off-by: Aaron Teo * devops: disable ppc64le to test s390x, check test build Signed-off-by: Aaron Teo * devops: dup .gguf.inp files for big-endian tests Signed-off-by: Aaron Teo * devops: dup .gguf.out files for big-endian too Signed-off-by: Aaron Teo * devops: add python setup and endian byteswap Signed-off-by: Aaron Teo * devops: pooring thing does not have s390x python3 Signed-off-by: Aaron Teo * devops: add missing rust compiler for s390x Signed-off-by: Aaron Teo * devops: try rust actions runner Signed-off-by: Aaron Teo * Revert "devops: try rust actions runner" This reverts commit 3f8db04356033d6c1d7eccc75ca396bc5298250c. Signed-off-by: Aaron Teo * devops: try a different path for rust Signed-off-by: Aaron Teo * devops: dump home directory and user info Signed-off-by: Aaron Teo * devops: install gguf-py only Signed-off-by: Aaron Teo * devops: missed relative path Signed-off-by: Aaron Teo * devops: remove big-endian files since local swapping is working Signed-off-by: Aaron Teo * devops: revert test-tokenizer-0 cmakelists Signed-off-by: Aaron Teo * Fix unicode flags conversion from and to uint16_t Bitfields are allocated in different order on s390x Signed-off-by: Aaron Teo * Simplify byteswap command Signed-off-by: Aaron Teo * Add byteswapping and git-lfs for test-tokenizers-ggml-vocabs Signed-off-by: Aaron Teo * Fix endianness detection in vocab loader Signed-off-by: Aaron Teo * Disable test-thread-safety on s390x In this test a model is downloaded, then immediately loaded to check if more downloads are needed, and then used for test. There is no clean way to separate all those steps to add byteswapping between them, so just skip this test. Signed-off-by: Aaron Teo * Fix q8_0 test in test-quantize-fns vec_signed uses unexpected rounding mode. Explicitly use different rounding function. Signed-off-by: Aaron Teo * devops: add big-endian stories260K Signed-off-by: Aaron Teo * devops: add s390x test-eval-callback Signed-off-by: Aaron Teo * devops: fix test does not exist Signed-off-by: Aaron Teo * devops: fix model not found llama-eval-callback Signed-off-by: Aaron Teo * Fix q3_K dot product error in test-quantize-fns on s390x Array q8bytes had only 4 elements allocated, but 8 elements accessed. This lead to write out of bounds and later read of overwritten values out of bounds and incorrect result. Signed-off-by: Aaron Teo * devops: re-enable ppc64le for testing Signed-off-by: Aaron Teo * devops: activate test-thread-safety for s390x Signed-off-by: Aaron Teo * devops: disable ppc64le tests for some reason it keeps failing test-thread-safety tests and I do not have a machine that is able to replicate the tests. Signed-off-by: Aaron Teo * devops: LLAMA_FATAL_WARNINGS=ON Signed-off-by: Aaron Teo * Correct repository URL for s390x for test-thread-safety model Signed-off-by: Aaron Teo * Fix fs_get_cache_directory Ensure it works even if both XDG_CACHE_HOME and HOME are unset. This might happen in containers. Signed-off-by: Aaron Teo * Re-enable CI for ppc64le Signed-off-by: Aaron Teo * Fortify ggml_rope_impl Only memcpy data from sections argument if it's non-NULL. Signed-off-by: Aaron Teo * Add TODO in struct unicode_cpt_flags to reimplement it in endian-independent way * Update URL for big-endian model * Update .github/workflows/build.yml Co-authored-by: Sigbjørn Skjæret * Update remaining mentions of BE models to ggml-org/models repo --------- Signed-off-by: Aaron Teo Co-authored-by: Aleksei Nikiforov Co-authored-by: Aleksei Nikiforov Co-authored-by: Sigbjørn Skjæret --- diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml index 04ad187d3..e0e809ffd 100644 --- a/.github/workflows/build-linux-cross.yml +++ b/.github/workflows/build-linux-cross.yml @@ -141,97 +141,6 @@ jobs: # cmake --build build --config Release -j $(nproc) - ubuntu-24-ppc64el-cpu-cross: - runs-on: ubuntu-24.04 - - steps: - - uses: actions/checkout@v4 - - name: Setup PowerPC64le - run: | - sudo dpkg --add-architecture ppc64el - - # Add arch-specific repositories for non-amd64 architectures - cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe - EOF - - sudo apt-get update || true ;# Prevent failure due to missing URLs. - - sudo apt-get install -y --no-install-recommends \ - build-essential \ - gcc-14-powerpc64le-linux-gnu \ - g++-14-powerpc64le-linux-gnu - - - name: Build - run: | - cmake -B build -DLLAMA_CURL=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_SYSTEM_PROCESSOR=ppc64 \ - -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \ - -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \ - -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - cmake --build build --config Release -j $(nproc) - - # ubuntu-24-ppc64el-vulkan-cross: - # runs-on: ubuntu-24.04 - - # steps: - # - uses: actions/checkout@v4 - # - name: Setup PowerPC64le - # run: | - # sudo dpkg --add-architecture ppc64el - - # # Add arch-specific repositories for non-amd64 architectures - # cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list - # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe - # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe - # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe - # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe - # EOF - - # sudo apt-get update || true ;# Prevent failure due to missing URLs. - - # sudo apt-get install -y --no-install-recommends \ - # build-essential \ - # glslc \ - # gcc-14-powerpc64le-linux-gnu \ - # g++-14-powerpc64le-linux-gnu \ - # libvulkan-dev:ppc64el - - # - name: Build - # run: | - # cmake -B build -DLLAMA_CURL=OFF \ - # -DCMAKE_BUILD_TYPE=Release \ - # -DGGML_VULKAN=ON \ - # -DGGML_OPENMP=OFF \ - # -DLLAMA_BUILD_EXAMPLES=ON \ - # -DLLAMA_BUILD_TOOLS=ON \ - # -DLLAMA_BUILD_TESTS=OFF \ - # -DCMAKE_SYSTEM_NAME=Linux \ - # -DCMAKE_SYSTEM_PROCESSOR=ppc64 \ - # -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \ - # -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \ - # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - # -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \ - # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - # cmake --build build --config Release -j $(nproc) - debian-13-loongarch64-cpu-cross: runs-on: ubuntu-24.04 container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9706d2962..424b4ba78 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -192,6 +192,10 @@ jobs: os: ubuntu-22.04 - build: 'arm64' os: ubuntu-22.04-arm + - build: 's390x' + os: ubuntu-24.04-s390x + - build: 'ppc64le' + os: ubuntu-24.04-ppc64le runs-on: ${{ matrix.os }} @@ -206,11 +210,28 @@ jobs: key: ubuntu-cpu-cmake evict-old-files: 1d - - name: Dependencies - id: depends + - name: Build Dependencies + id: build_depends run: | sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev + sudo apt-get install -y --no-install-recommends \ + python3 python3-pip python3-dev \ + libjpeg-dev build-essential libcurl4-openssl-dev \ + git-lfs + + - name: Python Dependencies + id: python_depends + run: | + python3 -m pip install --upgrade pip + pip3 install ./gguf-py + + - name: Swap Endianness + id: endianness + if: ${{ matrix.build == 's390x' }} + run: | + for f in models/*.gguf; do + echo YES | python3 gguf-py/gguf/scripts/gguf_convert_endian.py $f big + done - name: Build id: cmake_build @@ -228,6 +249,7 @@ jobs: - name: Test llama2c conversion id: llama2c_test + if: ${{ matrix.build != 's390x' }} run: | cd build echo "Fetch tokenizer" @@ -237,6 +259,15 @@ jobs: ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 + - name: Test llama2c (s390x) + id: llama2c_test_s390x + if: ${{ matrix.build == 's390x' }} + run: | + cd build + echo "Fetch llama2c big-endian model" + wget https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-be.gguf + ./bin/llama-cli -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 + ubuntu-latest-cmake-sanitizer: runs-on: ubuntu-latest diff --git a/common/common.cpp b/common/common.cpp index 6ebd93415..c1e736c44 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -51,6 +51,11 @@ #include #endif +#if defined(__linux__) +#include +#include +#endif + #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif @@ -865,8 +870,20 @@ std::string fs_get_cache_directory() { #if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) || defined(__OpenBSD__) if (std::getenv("XDG_CACHE_HOME")) { cache_directory = std::getenv("XDG_CACHE_HOME"); - } else { + } else if (std::getenv("HOME")) { cache_directory = std::getenv("HOME") + std::string("/.cache/"); + } else { +#if defined(__linux__) + /* no $HOME is defined, fallback to getpwuid */ + struct passwd *pw = getpwuid(getuid()); + if ((!pw) || (!pw->pw_dir)) { + throw std::runtime_error("Failed to find $HOME directory"); + } + + cache_directory = std::string(pw->pw_dir) + std::string("/.cache/"); +#else /* defined(__linux__) */ + throw std::runtime_error("Failed to find $HOME directory"); +#endif /* defined(__linux__) */ } #elif defined(__APPLE__) cache_directory = std::getenv("HOME") + std::string("/Library/Caches/"); diff --git a/examples/eval-callback/CMakeLists.txt b/examples/eval-callback/CMakeLists.txt index 95915ed91..c514e4317 100644 --- a/examples/eval-callback/CMakeLists.txt +++ b/examples/eval-callback/CMakeLists.txt @@ -5,6 +5,11 @@ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) set(TEST_TARGET test-eval-callback) -add_test(NAME ${TEST_TARGET} - COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0) +if(NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x") + add_test(NAME ${TEST_TARGET} + COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0) +else() + add_test(NAME ${TEST_TARGET} + COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K-be.gguf --model stories260K-be.gguf --prompt hello --seed 42 -ngl 0) +endif() set_property(TEST ${TEST_TARGET} PROPERTY LABELS eval-callback curl) diff --git a/ggml/src/ggml-cpu/arch/s390/quants.c b/ggml/src/ggml-cpu/arch/s390/quants.c index a19ee68c1..19d225a48 100644 --- a/ggml/src/ggml-cpu/arch/s390/quants.c +++ b/ggml/src/ggml-cpu/arch/s390/quants.c @@ -75,7 +75,8 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i for (int j = 0; j < 8; j++) { const float32x4_t v = vec_mul(srcv[j], vec_splats(id)); - const int32x4_t vi = vec_signed(v); + /* Uses non-default rounding for vec_signed or vec_round */ + const int32x4_t vi = vec_signed(__builtin_s390_vfisb(v, 4, 1)); y[i].qs[4*j + 0] = vec_extract(vi, 0); y[i].qs[4*j + 1] = vec_extract(vi, 1); @@ -122,7 +123,8 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i for (int j = 0; j < 8; j++) { const float32x4_t v = vec_mul(srcv[j], vec_splats(id)); - const int32x4_t vi = vec_signed(v); + /* Uses non-default rounding for vec_signed or vec_round */ + const int32x4_t vi = vec_signed(__builtin_s390_vfisb(v, 4, 1)); y[i].qs[4*j + 0] = vec_extract(vi, 0); y[i].qs[4*j + 1] = vec_extract(vi, 1); @@ -731,7 +733,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi uint8x16_t q3h[4]; uint8x16_t q3b[2]; int8x16_t q3bytes[4]; - int8x16_t q8bytes[4]; + int8x16_t q8bytes[8]; uint8x16_t qhbits[2]; float sum = 0; diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index fe36bab83..a5796214f 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3927,7 +3927,7 @@ static struct ggml_tensor * ggml_rope_impl( memcpy(params + 8, &attn_factor, sizeof(float)); memcpy(params + 9, &beta_fast, sizeof(float)); memcpy(params + 10, &beta_slow, sizeof(float)); - if (mrope_used) { + if (mrope_used && sections) { memcpy(params + 11, sections, sizeof(int32_t) * GGML_MROPE_SECTIONS); } else { memset(params + 11, 0, sizeof(int32_t) * GGML_MROPE_SECTIONS); diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 8cb36661a..da938af03 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1772,7 +1772,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { const size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx); const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx); precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap); -#ifdef IS_BIG_ENDIAN +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ // correct endiannes of data in precompiled_charsmap binary blob uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0]; *xcda_blob_size = __builtin_bswap32(*xcda_blob_size); diff --git a/src/unicode.h b/src/unicode.h index 0a5fa2a78..5bd1362ff 100644 --- a/src/unicode.h +++ b/src/unicode.h @@ -4,6 +4,7 @@ #include #include +// TODO: reimplement this structure in endian-independent way struct unicode_cpt_flags { enum { UNDEFINED = 0x0001, @@ -15,6 +16,10 @@ struct unicode_cpt_flags { SYMBOL = 0x0040, // regex: \p{S} CONTROL = 0x0080, // regex: \p{C} MASK_CATEGORIES = 0x00FF, + WHITESPACE = 0x0100, + LOWERCASE = 0x0200, + UPPERCASE = 0x0400, + NFD = 0x0800, }; // codepoint type @@ -34,11 +39,49 @@ struct unicode_cpt_flags { // decode from uint16 inline unicode_cpt_flags(const uint16_t flags = 0) { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ *reinterpret_cast(this) = flags; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + is_undefined = (flags & UNDEFINED) ? 1 : 0; + is_number = (flags & NUMBER) ? 1 : 0; + is_letter = (flags & LETTER) ? 1 : 0; + is_separator = (flags & SEPARATOR) ? 1 : 0; + is_accent_mark = (flags & ACCENT_MARK) ? 1 : 0; + is_punctuation = (flags & PUNCTUATION) ? 1 : 0; + is_symbol = (flags & SYMBOL) ? 1 : 0; + is_control = (flags & CONTROL) ? 1 : 0; + is_whitespace = (flags & WHITESPACE) ? 1 : 0; + is_lowercase = (flags & LOWERCASE) ? 1 : 0; + is_uppercase = (flags & UPPERCASE) ? 1 : 0; + is_nfd = (flags & NFD) ? 1 : 0; +#else +#error Unexpected or undefined __BYTE_ORDER__ +#endif } inline uint16_t as_uint() const { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return *reinterpret_cast(this); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + uint16_t result = + is_undefined * UNDEFINED + + is_number * NUMBER + + is_letter * LETTER + + is_separator * SEPARATOR + + is_accent_mark * ACCENT_MARK + + is_punctuation * PUNCTUATION + + is_symbol * SYMBOL + + is_control * CONTROL + + is_whitespace * WHITESPACE + + is_lowercase * LOWERCASE + + is_uppercase * UPPERCASE + + is_nfd * NFD + ; + + return result; +#else +#error Unexpected or undefined __BYTE_ORDER__ +#endif } inline uint16_t category_flag() const { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3e9e082d9..d9cc5e933 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -185,7 +185,11 @@ llama_build_and_test(test-json-partial.cpp) llama_build_and_test(test-log.cpp) llama_build_and_test(test-regex-partial.cpp) -llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2) +if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x") + llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2) +else() + llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-be.Q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2) +endif() # this fails on windows (github hosted runner) due to curl DLL not found (exit code 0xc0000135) if (NOT WIN32) diff --git a/tests/test-tokenizers-repo.sh b/tests/test-tokenizers-repo.sh index 1158aebae..94a3d05ba 100755 --- a/tests/test-tokenizers-repo.sh +++ b/tests/test-tokenizers-repo.sh @@ -23,6 +23,13 @@ if [ -d $folder ] && [ -d $folder/.git ]; then (cd $folder; git pull) else git clone $repo $folder + + # byteswap models if on big endian + if [ "$(uname -m)" = s390x ]; then + for f in $folder/*/*.gguf; do + echo YES | python3 "$(dirname $0)/../gguf-py/gguf/scripts/gguf_convert_endian.py" $f big + done + fi fi shopt -s globstar