id: cmake_test
run: |
cd build
- ctest --verbose --timeout 900
+ ctest -L main --verbose --timeout 900
ubuntu-latest-cmake-sanitizer:
runs-on: ubuntu-latest
id: cmake_test
run: |
cd build
- ctest --verbose --timeout 900
+ ctest -L main --verbose --timeout 900
ubuntu-latest-cmake-mpi:
runs-on: ubuntu-latest
id: cmake_test
run: |
cd build
- ctest --verbose
+ ctest -L main --verbose
# TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
# how to debug it.
id: cmake_test
run: |
cd build
- ctest --verbose --timeout 900
+ ctest -L main --verbose --timeout 900
macOS-latest-cmake-ios:
runs-on: macos-latest
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
run: |
cd build
- ctest -C Release --verbose --timeout 900
+ ctest -L main -C Release --verbose --timeout 900
- name: Test (Intel SDE)
id: cmake_test_sde
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
cd build
- & $sde -future -- ctest -C Release --verbose --timeout 900
+ & $sde -future -- ctest -L main -C Release --verbose --timeout 900
- name: Determine tag name
id: tag
lcov-report/
gcovr-report/
-build*/
+build*
out/
tmp/
poetry.lock
poetry.toml
-
-# Test binaries
-/tests/test-grammar-parser
-/tests/test-llama-grammar
-/tests/test-double-float
-/tests/test-grad0
-/tests/test-opt
-/tests/test-quantize-fns
-/tests/test-quantize-perf
-/tests/test-sampling
-/tests/test-tokenizer-0-llama
-/tests/test-tokenizer-0-falcon
-/tests/test-tokenizer-1-llama
-/tests/test-tokenizer-1-bpe
-/tests/test-rope
-/tests/test-backend-ops
-/tests/test-autorelease
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
- tests/test-backend-ops tests/test-autorelease
+ tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease
# Code coverage output files
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
-tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
+tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
OUT=$(realpath "$1")
MNT=$(realpath "$2")
-rm -v $OUT/*.log
-rm -v $OUT/*.exit
-rm -v $OUT/*.md
+rm -f "$OUT/*.log"
+rm -f "$OUT/*.exit"
+rm -f "$OUT/*.md"
sd=`dirname $0`
cd $sd/../
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
- (time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
+ (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
set +e
}
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
if [ -z ${GG_BUILD_LOW_PERF} ]; then
- (time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log
+ (time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
else
- (time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
+ (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
fi
set +e
gg_printf '```\n'
}
+function gg_get_model {
+ local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf"
+ local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf"
+ if [[ -s $gguf_3b ]]; then
+ echo -n "$gguf_3b"
+ elif [[ -s $gguf_7b ]]; then
+ echo -n "$gguf_7b"
+ else
+ echo >&2 "No model found. Can't run gg_run_ctest_with_model."
+ exit 1
+ fi
+}
+
+function gg_run_ctest_with_model_debug {
+ cd ${SRC}
+
+ local model; model=$(gg_get_model)
+ cd build-ci-debug
+ set -e
+ (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
+ set +e
+ cd ..
+}
+
+function gg_run_ctest_with_model_release {
+ cd ${SRC}
+
+ local model; model=$(gg_get_model)
+ cd build-ci-release
+ set -e
+ (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
+ set +e
+ cd ..
+}
+
+function gg_sum_ctest_with_model_debug {
+ gg_printf '### %s\n\n' "${ci}"
+
+ gg_printf 'Runs ctest with model files in debug mode\n'
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
+ gg_printf '```\n'
+ gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
+ gg_printf '```\n'
+}
+
+function gg_sum_ctest_with_model_release {
+ gg_printf '### %s\n\n' "${ci}"
+
+ gg_printf 'Runs ctest with model files in release mode\n'
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
+ gg_printf '```\n'
+ gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
+ gg_printf '```\n'
+}
+
# open_llama_3b_v2
function gg_run_open_llama_3b_v2 {
wiki_test_60="${path_wiki}/wiki.test-60.raw"
- ./bin/test-autorelease ${model_f16}
-
./bin/quantize ${model_f16} ${model_q8_0} q8_0
./bin/quantize ${model_f16} ${model_q4_0} q4_0
./bin/quantize ${model_f16} ${model_q4_1} q4_1
## main
if [ -z ${GG_BUILD_LOW_PERF} ]; then
+ # Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
rm -rf ${SRC}/models-mnt
-
mnt_models=${MNT}/models
mkdir -p ${mnt_models}
ln -sfn ${mnt_models} ${SRC}/models-mnt
- python3 -m pip install -r ${SRC}/requirements.txt
- python3 -m pip install --editable gguf-py
+ # Create a fresh python3 venv and enter it
+ python3 -m venv "$MNT/venv"
+ source "$MNT/venv/bin/activate"
+
+ pip install -r ${SRC}/requirements.txt --disable-pip-version-check
+ pip install --editable gguf-py --disable-pip-version-check
fi
ret=0
else
test $ret -eq 0 && gg_run open_llama_7b_v2
fi
+ test $ret -eq 0 && gg_run ctest_with_model_debug
+ test $ret -eq 0 && gg_run ctest_with_model_release
fi
fi
--- /dev/null
+#!/bin/bash
+set -euo pipefail
+this=$(realpath "$0"); readonly this
+cd "$(dirname "$this")"
+shellcheck "$this"
+
+if (( $# != 1 && $# != 2 )); then
+ cat >&2 <<'EOF'
+usage:
+ ci-run.sh <tmp_dir> [<cache_dir>]
+
+This script wraps ci/run.sh:
+* If <tmp_dir> is a ramdisk, you can reduce writes to your SSD. If <tmp_dir> is not a ramdisk, keep in mind that total writes will increase by the size of <cache_dir>.
+ (openllama_3b_v2: quantized models are about 30GB)
+* Persistent model and data files are synced to and from <cache_dir>,
+ excluding generated .gguf files.
+ (openllama_3b_v2: persistent files are about 6.6GB)
+* <cache_dir> defaults to ~/.cache/llama.cpp
+EOF
+ exit 1
+fi
+
+cd .. # => llama.cpp repo root
+
+tmp="$1"
+mkdir -p "$tmp"
+tmp=$(realpath "$tmp")
+echo >&2 "Using tmp=$tmp"
+
+cache="${2-$HOME/.cache/llama.cpp}"
+mkdir -p "$cache"
+cache=$(realpath "$cache")
+echo >&2 "Using cache=$cache"
+
+_sync() {
+ local from="$1"; shift
+ local to="$1"; shift
+
+ echo >&2 "Syncing from $from to $to"
+ mkdir -p "$from" "$to"
+ rsync -a "$from" "$to" --delete-during "$@"
+}
+
+_sync "$(realpath .)/" "$tmp/llama.cpp"
+_sync "$cache/ci-mnt/models/" "$tmp/llama.cpp/ci-mnt/models/"
+
+cd "$tmp/llama.cpp"
+bash ci/run.sh ci-out ci-mnt
+
+_sync 'ci-mnt/models/' "$cache/ci-mnt/models/" --exclude='*.gguf' -P
function(llama_build_executable source)
get_filename_component(TEST_TARGET ${source} NAME_WE)
- add_executable(${TEST_TARGET} ${source})
+ add_executable(${TEST_TARGET} ${source} get-model.cpp)
install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE common)
endfunction()
function(llama_test_executable name source)
get_filename_component(TEST_TARGET ${source} NAME_WE)
add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
+ set_property(TEST ${name} PROPERTY LABELS "main")
endfunction()
function(llama_build_and_test_executable source)
+ llama_build_and_test_executable_with_label(${source} "main")
+endfunction()
+
+function(llama_build_and_test_executable_with_label source label)
get_filename_component(TEST_TARGET ${source} NAME_WE)
- add_executable(${TEST_TARGET} ${source})
+ add_executable(${TEST_TARGET} ${source} get-model.cpp)
install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE common)
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
+ set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${label})
endfunction()
# llama_build_and_test_executable(test-double-float.cpp) # SLOW
llama_build_and_test_executable(test-grad0.cpp)
# llama_build_and_test_executable(test-opt.cpp) # SLOW
llama_build_and_test_executable(test-backend-ops.cpp)
-llama_build_and_test_executable(test-autorelease.cpp)
llama_build_and_test_executable(test-rope.cpp)
+llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model")
+llama_build_and_test_executable_with_label(test-autorelease.cpp "model")
+
# dummy executable - not installed
get_filename_component(TEST_TARGET test-c.c NAME_WE)
add_executable(${TEST_TARGET} test-c.c)
--- /dev/null
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include "get-model.h"
+
+char * get_model_or_exit(int argc, char *argv[]) {
+ char * model_path;
+ if (argc > 1) {
+ model_path = argv[1];
+
+ } else {
+ model_path = getenv("LLAMACPP_TEST_MODELFILE");
+ if (!model_path || strlen(model_path) == 0) {
+ fprintf(stderr, "\033[33mWARNING: No model file provided. Skipping this test. Set LLAMACPP_TEST_MODELFILE=<gguf_model_path> to silence this warning and run this test.\n\033[0m");
+ exit(EXIT_SUCCESS);
+ }
+ }
+
+ return model_path;
+}
--- /dev/null
+#pragma once
+char * get_model_or_exit(int, char*[]);
#include <thread>
#include "llama.h"
+#include "get-model.h"
// This creates a new context inside a pthread and then tries to exit cleanly.
int main(int argc, char ** argv) {
- if (argc < 2) {
- printf("Usage: %s model.gguf\n", argv[0]);
- return 0; // intentionally return success
- }
+ auto * model_path = get_model_or_exit(argc, argv);
- const std::string fname = argv[1];
-
- std::thread([&fname]() {
+ std::thread([&model_path]() {
llama_backend_init(false);
- auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params());
+ auto * model = llama_load_model_from_file(model_path, llama_model_default_params());
auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
llama_free(ctx);
llama_free_model(model);
--- /dev/null
+#include "llama.h"
+#include "get-model.h"
+
+#include <cstdlib>
+
+int main(int argc, char *argv[] ) {
+ auto * model_path = get_model_or_exit(argc, argv);
+ auto * file = fopen(model_path, "r");
+ if (file == nullptr) {
+ fprintf(stderr, "no model at '%s' found\n", model_path);
+ return EXIT_FAILURE;
+ }
+
+ fprintf(stderr, "using '%s'\n", model_path);
+ fclose(file);
+
+ llama_backend_init(false);
+ auto params = llama_model_params{};
+ params.use_mmap = false;
+ params.progress_callback = [](float progress, void * ctx){
+ (void) ctx;
+ return progress > 0.50;
+ };
+ auto * model = llama_load_model_from_file(model_path, params);
+ llama_backend_free();
+ return model == nullptr ? EXIT_SUCCESS : EXIT_FAILURE;
+}