-Wpointer-arith
-Wmissing-prototypes
-Werror=implicit-int
+ -Wno-unused-function
)
set(cxx_flags
-Wall
-Wno-unused-function
-Wno-multichar
)
+ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ # g++ only
+ set(cxx_flags ${cxx_flags} -Wno-format-truncation)
+ endif()
else()
# todo : msvc
endif()
# warnings
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
- -Wmissing-prototypes -Werror=implicit-int
+ -Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
+ifeq '' '$(findstring clang++,$(CXX))'
+ # g++ only
+ CXXFLAGS += -Wno-format-truncation
+endif
+
# OS specific
# TODO: support Windows
ifeq ($(UNAME_S),Linux)
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
-#define NOMINMAX
+#ifndef NOMINMAX
+# define NOMINMAX
+#endif
#include <codecvt>
#include <locale>
#include <windows.h>
dump_string_yaml_multiline(stream, "grammar", params.grammar.c_str());
fprintf(stream, "grammar-file: # never logged, see grammar instead. Can still be specified for input.\n");
fprintf(stream, "hellaswag: %s # default: false\n", params.hellaswag ? "true" : "false");
- fprintf(stream, "hellaswag_tasks: %ld # default: 400\n", params.hellaswag_tasks);
+ fprintf(stream, "hellaswag_tasks: %zu # default: 400\n", params.hellaswag_tasks);
const auto logit_bias_eos = params.logit_bias.find(llama_token_eos(lctx));
const bool ignore_eos = logit_bias_eos != params.logit_bias.end() && logit_bias_eos->second == -INFINITY;
int estimateWidth(char32_t codepoint) {
#if defined(_WIN32)
+ (void)codepoint;
return 1;
#else
return wcwidth(codepoint);
float error_before_opt = ggml_get_f32_1d(e, 0);
- struct ggml_opt_params opt_params_adam = ggml_opt_default_params(GGML_OPT_ADAM);
struct ggml_opt_params opt_params_lbfgs = ggml_opt_default_params(GGML_OPT_LBFGS);
- opt_params_adam.print_forward_graph = false;
- opt_params_adam.print_backward_graph = false;
opt_params_lbfgs.print_forward_graph = false;
opt_params_lbfgs.print_backward_graph = false;
- opt_params_adam.adam.n_iter = 16;
opt_params_lbfgs.lbfgs.n_iter = 16;
- // ggml_opt(ctx0, opt_params_adam, e);
ggml_opt(ctx0, opt_params_lbfgs, e);
//
ggml_build_forward_expand(&gf, e);
#include <unistd.h>
#elif defined (_WIN32)
#define WIN32_LEAN_AND_MEAN
-#define NOMINMAX
+#ifndef NOMINMAX
+# define NOMINMAX
+#endif
#include <windows.h>
#include <signal.h>
#endif
assert(0u < beams_state.n_beams);
const llama_token * tokens = beams_state.beam_views[0].tokens;
std::copy(tokens, tokens + n, callback_data.response.end() - n);
- printf("%lu", n);
+ printf("%zu", n);
}
fflush(stdout);
#if 1 // DEBUG: print current beams for this iteration
if (tokens_list.size() > max_tokens_list_size)
{
- fprintf( stderr , "%s: error: prompt too long (%lu tokens, max %lu)\n" ,
+ fprintf( stderr , "%s: error: prompt too long (%zu tokens, max %zu)\n" ,
__func__ , tokens_list.size() , max_tokens_list_size );
return 1;
}
#include "completion.js.hpp"
#include "json-schema-to-grammar.mjs.hpp"
+#include <cstddef>
+
#ifndef SERVER_VERBOSE
#define SERVER_VERBOSE 1
#endif
{
const auto timings = llama_get_timings(llama.ctx);
- assert(timings.n_eval == llama.num_tokens_predicted);
+ assert(timings.n_eval == ptrdiff_t(llama.num_tokens_predicted));
return json{
{"prompt_n", timings.n_p_eval},
const llama_token * tokens = beams_state.beam_views[0].tokens;
const auto map = [](llama_token tok) { return completion_token_output{{},tok}; };
std::transform(tokens, tokens + n, llama.generated_token_probs.end() - n, map);
- printf("%lu", n);
+ printf("%zu", n);
}
fflush(stdout);
#if 0 // DEBUG: print current beams for this iteration
svr.set_exception_handler([](const Request &, Response &res, std::exception_ptr ep)
{
- const auto * fmt = "500 Internal Server Error\n%s";
+ const char fmt[] = "500 Internal Server Error\n%s";
char buf[BUFSIZ];
try {
std::rethrow_exception(std::move(ep));
int ntry, float alpha) {
float min = x[0];
float max = x[0];
- float sum_x = 0;
- float sum_x2 = 0;
for (int i = 1; i < n; ++i) {
if (x[i] < min) min = x[i];
if (x[i] > max) max = x[i];
- sum_x += x[i];
- sum_x2 += x[i]*x[i];
}
if (max == min) {
for (int i = 0; i < n; ++i) L[i] = 0;
__m256 acc = _mm256_setzero_ps();
- uint32_t *aux;
+ const uint32_t *aux;
for (int i = 0; i < nb; ++i) {
const int8_t * restrict q8 = y[i].qs;
// Set up scales
- aux = (uint32_t *)x[i].scales;
+ aux = (const uint32_t *)x[i].scales;
__m128i scales128 = _mm_set_epi32(
((aux[1] >> 4) & kmask2) | (((aux[2] >> 6) & kmask1) << 4),
((aux[0] >> 4) & kmask2) | (((aux[2] >> 4) & kmask1) << 4),
std::vector<std::vector<const llama_grammar_element *>> & new_stacks) {
if (stack.empty()) {
- new_stacks.push_back(stack);
+ new_stacks.emplace_back(stack);
return;
}
}
case LLAMA_GRETYPE_CHAR:
case LLAMA_GRETYPE_CHAR_NOT:
- new_stacks.push_back(stack);
+ new_stacks.emplace_back(stack);
break;
default:
// end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range