server : fix temperature + disable some tests (#7409)

author Georgi Gerganov <redacted>

Mon, 20 May 2024 12:10:03 +0000 (15:10 +0300)

committer GitHub <redacted>

Mon, 20 May 2024 12:10:03 +0000 (22:10 +1000)
author Georgi Gerganov <redacted>
Mon, 20 May 2024 12:10:03 +0000 (15:10 +0300)
committer GitHub <redacted>
Mon, 20 May 2024 12:10:03 +0000 (22:10 +1000)
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml

index 217af67cfd42007ef3546297915c747b587f6cdf..0789efd18a1ab59ceed12f5412e97e3916a9ffba 100644 (file)
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@@ -33,13 +33,10 @@ jobs:
      strategy:
        matrix:
          sanitizer: [ADDRESS, THREAD, UNDEFINED]
-        build_type: [Debug]
+        build_type: [RelWithDebInfo]
          include:
            - build_type: Release
              sanitizer: ""
-          - build_type: Debug
-            sanitizer: THREAD
-            disabled_on_pr: true
        fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
  
      steps:
@@ -103,10 +100,8 @@ jobs:
                -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
            cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server
  
-
        - name: Tests
          id: server_integration_tests
-        if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
          run: |
            cd examples/server/tests
            PORT=8888 ./tests.sh
diff --git a/examples/server/tests/features/results.feature b/examples/server/tests/features/results.feature

index 4ab8ad20c2680cafcf7f4749c06a0fcde694995a..e8e1b54147b05d457aad979d9116127cbb32357c 100644 (file)
--- a/examples/server/tests/features/results.feature
+++ b/examples/server/tests/features/results.feature
@@ -13,7 +13,7 @@ Feature: Results
  
    Scenario Outline: consistent results with same seed
      Given <n_slots> slots
-    And   0.0 temperature
+    And   1.0 temperature
      Then  the server is starting
      Then  the server is healthy
  
@@ -27,7 +27,8 @@ Feature: Results
      Examples:
        | n_slots |
        | 1       |
-      | 2       |
+      # FIXME: unified KV cache nondeterminism
+      # | 2       |
  
    Scenario Outline: different results with different seed
      Given <n_slots> slots
@@ -73,14 +74,13 @@ Feature: Results
      Examples:
        | n_parallel | temp |
        | 1          | 0.0  |
-      | 2          | 0.0  |
-      | 4          | 0.0  |
        | 1          | 1.0  |
-      # FIXME: These tests fail on master.
-      # Problems: unified KV cache (except for CPU backend with LLAMA_NO_LLAMAFILE=1), SIMD nondeterminism.
+      # FIXME: unified KV cache nondeterminism
        # See https://github.com/ggerganov/whisper.cpp/issues/1941#issuecomment-1986923227
        # and https://github.com/ggerganov/llama.cpp/pull/6122#discussion_r1531405574
        # and https://github.com/ggerganov/llama.cpp/pull/7347 .
+      # | 2          | 0.0  |
+      # | 4          | 0.0  |
        # | 2          | 1.0  |
        # | 4          | 1.0  |
  
@@ -108,12 +108,11 @@ Feature: Results
      Examples:
        | n_slots | n_kv | n_predict | n_parallel |
        | 4       | 1024 | 1         | 1          |
-      | 4       | 1024 | 1         | 4          |
-      # FIXME: These tests fail on master.
-      # Problems: unified KV cache (except for CPU backend with LLAMA_NO_LLAMAFILE=1), SIMD nondeterminism.
+      # FIXME: unified KV cache nondeterminism
        # See https://github.com/ggerganov/whisper.cpp/issues/1941#issuecomment-1986923227
        # and https://github.com/ggerganov/llama.cpp/pull/6122#discussion_r1531405574
        # and https://github.com/ggerganov/llama.cpp/pull/7347 .
+      # | 4       | 1024 | 1         | 4          |
        # | 4       | 1024 | 100       | 1          |
        # This test still fails even the above patches; the first token probabilities are already different.
        # | 4       | 1024 | 100       | 4          |
author	Georgi Gerganov <redacted>
	Mon, 20 May 2024 12:10:03 +0000 (15:10 +0300)
committer	GitHub <redacted>
	Mon, 20 May 2024 12:10:03 +0000 (22:10 +1000)
.github/workflows/server.yml		patch \| blob \| history
examples/server/tests/features/results.feature		patch \| blob \| history