ci: run the x64 and arm ci on the github machines instead (#16183)

author Eve <redacted>

Thu, 25 Sep 2025 05:06:06 +0000 (05:06 +0000)

committer GitHub <redacted>

Thu, 25 Sep 2025 05:06:06 +0000 (08:06 +0300)
author Eve <redacted>
Thu, 25 Sep 2025 05:06:06 +0000 (05:06 +0000)
committer GitHub <redacted>
Thu, 25 Sep 2025 05:06:06 +0000 (08:06 +0300)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml

index a9ab68c3e8588807d23a12c3036d87f82345f7bc..4f70232b1777f05c1c652af1190759d2b9e0c47e 100644 (file)
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1251,56 +1251,129 @@ jobs:
  # TODO: simplify the following workflows using a matrix
  # TODO: run lighter CI on PRs and the full CI only on master (if needed)
    ggml-ci-x64-cpu-low-perf:
-    runs-on: [self-hosted, Linux, X64, CPU, low-perf]
+    runs-on: ubuntu-22.04
  
      steps:
        - name: Clone
          id: checkout
          uses: actions/checkout@v4
  
+      - name: ccache
+        uses: ggml-org/ccache-action@v1.2.16
+        with:
+          key: ggml-ci-x64-cpu-low-perf
+          evict-old-files: 1d
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential libcurl4-openssl-dev
+
        - name: Test
          id: ggml-ci
          run: |
-          bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
  
    ggml-ci-arm64-cpu-low-perf:
-    runs-on: [self-hosted, Linux, ARM64, CPU, low-perf]
+    runs-on: ubuntu-22.04-arm
  
      steps:
        - name: Clone
          id: checkout
          uses: actions/checkout@v4
  
+      - name: ccache
+        uses: ggml-org/ccache-action@v1.2.16
+        with:
+          key: ggml-ci-arm64-cpu-low-perf
+          evict-old-files: 1d
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential libcurl4-openssl-dev
+
        - name: Test
          id: ggml-ci
          run: |
-          bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
  
    ggml-ci-x64-cpu-high-perf:
-    runs-on: [self-hosted, Linux, X64, CPU, high-perf]
+    runs-on: ubuntu-22.04
  
      steps:
        - name: Clone
          id: checkout
          uses: actions/checkout@v4
  
+      - name: ccache
+        uses: ggml-org/ccache-action@v1.2.16
+        with:
+          key: ggml-ci-x64-cpu-high-perf
+          evict-old-files: 1d
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential libcurl4-openssl-dev
+
        - name: Test
          id: ggml-ci
          run: |
-          bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+          LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt
  
    ggml-ci-arm64-cpu-high-perf:
-    runs-on: [self-hosted, Linux, ARM64, CPU, high-perf]
+    runs-on: ubuntu-22.04-arm
  
      steps:
        - name: Clone
          id: checkout
          uses: actions/checkout@v4
  
+      - name: ccache
+        uses: ggml-org/ccache-action@v1.2.16
+        with:
+          key: ggml-ci-arm64-cpu-high-perf
+          evict-old-files: 1d
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential libcurl4-openssl-dev
+
+      - name: Test
+        id: ggml-ci
+        run: |
+          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+
+  ggml-ci-arm64-cpu-high-perf-sve:
+    runs-on: ubuntu-22.04-arm
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v4
+
+      - name: ccache
+        uses: ggml-org/ccache-action@v1.2.16
+        with:
+          key: ggml-ci-arm64-cpu-high-perf-sve
+          evict-old-files: 1d
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential libcurl4-openssl-dev
+
        - name: Test
          id: ggml-ci
          run: |
-          GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
  
    ggml-ci-x64-nvidia-cuda:
      runs-on: [self-hosted, Linux, X64, NVIDIA]
diff --git a/ci/run.sh b/ci/run.sh

index 44f59c11e4eca2df9d3745a932767bf6c6f089b8..68cbfdf2f52aa67102c7e56ae8d233dfa5208a66 100755 (executable)
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -109,6 +109,11 @@ if [ ! -z ${GG_BUILD_MUSA} ]; then
      MUSA_ARCH=${MUSA_ARCH:-21}
      CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
  fi
+
+if [ ! -z ${GG_BUILD_NO_SVE} ]; then
+    # arm 9 and newer enables sve by default, adjust these flags depending on the cpu used
+    CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm"
+fi
  ## helpers
  
  # download a file if it does not exist or if it is outdated
@@ -345,16 +350,16 @@ function gg_run_qwen3_0_6b {
  
      wiki_test="${path_wiki}/wiki.test.raw"
  
-    ./bin/llama-quantize ${model_bf16} ${model_q8_0} q8_0
-    ./bin/llama-quantize ${model_bf16} ${model_q4_0} q4_0
-    ./bin/llama-quantize ${model_bf16} ${model_q4_1} q4_1
-    ./bin/llama-quantize ${model_bf16} ${model_q5_0} q5_0
-    ./bin/llama-quantize ${model_bf16} ${model_q5_1} q5_1
-    ./bin/llama-quantize ${model_bf16} ${model_q2_k} q2_k
-    ./bin/llama-quantize ${model_bf16} ${model_q3_k} q3_k
-    ./bin/llama-quantize ${model_bf16} ${model_q4_k} q4_k
-    ./bin/llama-quantize ${model_bf16} ${model_q5_k} q5_k
-    ./bin/llama-quantize ${model_bf16} ${model_q6_k} q6_k
+    ./bin/llama-quantize ${model_bf16} ${model_q8_0} q8_0 $(nproc)
+    ./bin/llama-quantize ${model_bf16} ${model_q4_0} q4_0 $(nproc)
+    ./bin/llama-quantize ${model_bf16} ${model_q4_1} q4_1 $(nproc)
+    ./bin/llama-quantize ${model_bf16} ${model_q5_0} q5_0 $(nproc)
+    ./bin/llama-quantize ${model_bf16} ${model_q5_1} q5_1 $(nproc)
+    ./bin/llama-quantize ${model_bf16} ${model_q2_k} q2_k $(nproc)
+    ./bin/llama-quantize ${model_bf16} ${model_q3_k} q3_k $(nproc)
+    ./bin/llama-quantize ${model_bf16} ${model_q4_k} q4_k $(nproc)
+    ./bin/llama-quantize ${model_bf16} ${model_q5_k} q5_k $(nproc)
+    ./bin/llama-quantize ${model_bf16} ${model_q6_k} q6_k $(nproc)
  
      (time ./bin/llama-cli -no-cnv --model ${model_f16}  -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
      (time ./bin/llama-cli -no-cnv --model ${model_bf16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-bf16.log
@@ -427,7 +432,7 @@ function gg_run_qwen3_0_6b {
  function gg_sum_qwen3_0_6b {
      gg_printf '### %s\n\n' "${ci}"
  
-    gg_printf 'Pythia 2.8B:\n'
+    gg_printf 'Qwen3 0.6B:\n'
      gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
      gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
      gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp

index 2882884938388393e5b54bd1c584385951df60d5..cac0782dee9a9e5495d6dda407faa1ecdcd02c0d 100644 (file)
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -260,14 +260,7 @@ int main(int argc, char * argv[]) {
  
      int64_t iterations = params.iterations;
  
-
-    // Initialize GGML, ensures float conversion tables are initialized
-    struct ggml_init_params ggml_params = {
-        /* .mem_size   = */ 1*1024,
-        /* .mem_buffer = */ NULL,
-        /* .no_alloc   = */ true,
-    };
-    struct ggml_context * ctx = ggml_init(ggml_params);
+    ggml_cpu_init();
  
      for (int i = 0; i < GGML_TYPE_COUNT; i++) {
          ggml_type type = (ggml_type) i;
@@ -359,7 +352,5 @@ int main(int argc, char * argv[]) {
          }
      }
  
-    ggml_free(ctx);
-
      return 0;
  }
author	Eve <redacted>
	Thu, 25 Sep 2025 05:06:06 +0000 (05:06 +0000)
committer	GitHub <redacted>
	Thu, 25 Sep 2025 05:06:06 +0000 (08:06 +0300)
.github/workflows/build.yml		patch \| blob \| history
ci/run.sh		patch \| blob \| history
tests/test-quantize-perf.cpp		patch \| blob \| history