From: Neo Zhang Date: Tue, 23 Dec 2025 04:59:12 +0000 (+0800) Subject: [SYCL] replace llama-cli by llama-completion to rm the impact to test script (#18290) X-Git-Tag: upstream/0.0.7599~82 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=a6a552e4ec43c2b6ed29b05d8da6921c59f05ed7;p=pkg%2Fggml%2Fsources%2Fllama.cpp [SYCL] replace llama-cli by llama-completion to rm the impact to test script (#18290) * replace llama-cli by llama-completion to rm the impact to test script * Update examples/sycl/run-llama2.sh Co-authored-by: Sigbjørn Skjæret * Update examples/sycl/run-llama2.sh Co-authored-by: Sigbjørn Skjæret * Update examples/sycl/run-llama3.sh Co-authored-by: Sigbjørn Skjæret * Update examples/sycl/run-llama3.sh Co-authored-by: Sigbjørn Skjæret * Update examples/sycl/win-run-llama2.bat Co-authored-by: Sigbjørn Skjæret * Update examples/sycl/win-run-llama3.bat Co-authored-by: Sigbjørn Skjæret --------- Co-authored-by: Neo Zhang Jianyu Co-authored-by: Sigbjørn Skjæret --- diff --git a/examples/sycl/run-llama2.sh b/examples/sycl/run-llama2.sh index a018e451..cf23619e 100755 --- a/examples/sycl/run-llama2.sh +++ b/examples/sycl/run-llama2.sh @@ -22,9 +22,9 @@ if [ $# -gt 0 ]; then GGML_SYCL_DEVICE=$1 echo "use $GGML_SYCL_DEVICE as main GPU" #use signle GPU only - ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none + ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none else #use multiple GPUs with same max compute units - ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} + ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} fi diff --git a/examples/sycl/run-llama3.sh b/examples/sycl/run-llama3.sh index 47702557..feee5165 100755 --- a/examples/sycl/run-llama3.sh +++ b/examples/sycl/run-llama3.sh @@ -24,8 +24,8 @@ export UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1 if [ $# -gt 0 ]; then GGML_SYCL_DEVICE=$1 echo "Using $GGML_SYCL_DEVICE as the main GPU" - ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none + ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none else #use multiple GPUs with same max compute units - ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m ${MODEL_FILE} -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} + ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} fi diff --git a/examples/sycl/win-run-llama2.bat b/examples/sycl/win-run-llama2.bat index b654f88f..32ff673a 100644 --- a/examples/sycl/win-run-llama2.bat +++ b/examples/sycl/win-run-llama2.bat @@ -8,4 +8,4 @@ set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" :: support malloc device memory more than 4GB. set UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1 -.\build\bin\llama-cli.exe -m models\llama-2-7b.Q4_0.gguf -p %INPUT2% -n 400 -e -ngl 99 -s 0 +.\build\bin\llama-completion.exe -m models\llama-2-7b.Q4_0.gguf -no-cnv -p %INPUT2% -n 400 -e -ngl 99 -s 0 diff --git a/examples/sycl/win-run-llama3.bat b/examples/sycl/win-run-llama3.bat index 608b834f..ea4ae69d 100644 --- a/examples/sycl/win-run-llama3.bat +++ b/examples/sycl/win-run-llama3.bat @@ -8,4 +8,4 @@ set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" :: support malloc device memory more than 4GB. set UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1 -.\build\bin\llama-cli.exe -m models\Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf -p %INPUT2% -n 400 -s 0 -e -ngl 99 +.\build\bin\llama-completion.exe -m models\Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf -no-cnv -p %INPUT2% -n 400 -s 0 -e -ngl 99