From: Neo Zhang <redacted>
Date: Tue, 23 Dec 2025 04:59:12 +0000 (+0800)
Subject: [SYCL] replace llama-cli by llama-completion to rm the impact to test script (#18290)
X-Git-Tag: upstream/0.0.7599~82
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=a6a552e4ec43c2b6ed29b05d8da6921c59f05ed7;p=pkg%2Fggml%2Fsources%2Fllama.cpp

[SYCL] replace llama-cli by llama-completion to rm the impact to test script (#18290)

* replace llama-cli by llama-completion to rm the impact to test script

* Update examples/sycl/run-llama2.sh

Co-authored-by: Sigbjørn Skjæret <redacted>

* Update examples/sycl/run-llama2.sh

Co-authored-by: Sigbjørn Skjæret <redacted>

* Update examples/sycl/run-llama3.sh

Co-authored-by: Sigbjørn Skjæret <redacted>

* Update examples/sycl/run-llama3.sh

Co-authored-by: Sigbjørn Skjæret <redacted>

* Update examples/sycl/win-run-llama2.bat

Co-authored-by: Sigbjørn Skjæret <redacted>

* Update examples/sycl/win-run-llama3.bat

Co-authored-by: Sigbjørn Skjæret <redacted>

---------

Co-authored-by: Neo Zhang Jianyu <redacted>
Co-authored-by: Sigbjørn Skjæret <redacted>
---

diff --git a/examples/sycl/run-llama2.sh b/examples/sycl/run-llama2.sh
index a018e4519..cf23619ee 100755
--- a/examples/sycl/run-llama2.sh
+++ b/examples/sycl/run-llama2.sh
@@ -22,9 +22,9 @@ if [ $# -gt 0 ]; then
     GGML_SYCL_DEVICE=$1
     echo "use $GGML_SYCL_DEVICE as main GPU"
     #use signle GPU only
-    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none
+    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none
 
 else
     #use multiple GPUs with same max compute units
-    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT}
+    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT}
 fi
diff --git a/examples/sycl/run-llama3.sh b/examples/sycl/run-llama3.sh
index 477025570..feee5165e 100755
--- a/examples/sycl/run-llama3.sh
+++ b/examples/sycl/run-llama3.sh
@@ -24,8 +24,8 @@ export UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1
 if [ $# -gt 0 ]; then
     GGML_SYCL_DEVICE=$1
     echo "Using $GGML_SYCL_DEVICE as the main GPU"
-    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none
+    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none
 else
     #use multiple GPUs with same max compute units
-    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT}
+    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT}
 fi
diff --git a/examples/sycl/win-run-llama2.bat b/examples/sycl/win-run-llama2.bat
index b654f88f6..32ff673ae 100644
--- a/examples/sycl/win-run-llama2.bat
+++ b/examples/sycl/win-run-llama2.bat
@@ -8,4 +8,4 @@ set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
 :: support malloc device memory more than 4GB.
 set UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1
 
-.\build\bin\llama-cli.exe -m models\llama-2-7b.Q4_0.gguf -p %INPUT2% -n 400 -e -ngl 99 -s 0
+.\build\bin\llama-completion.exe -m models\llama-2-7b.Q4_0.gguf -no-cnv -p %INPUT2% -n 400 -e -ngl 99 -s 0
diff --git a/examples/sycl/win-run-llama3.bat b/examples/sycl/win-run-llama3.bat
index 608b834f6..ea4ae69d6 100644
--- a/examples/sycl/win-run-llama3.bat
+++ b/examples/sycl/win-run-llama3.bat
@@ -8,4 +8,4 @@ set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
 :: support malloc device memory more than 4GB.
 set UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1
 
-.\build\bin\llama-cli.exe -m models\Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf -p %INPUT2% -n 400 -s 0 -e -ngl 99
+.\build\bin\llama-completion.exe -m models\Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf -no-cnv -p %INPUT2% -n 400 -s 0 -e -ngl 99