break;
}
- if (input_char == WEOF || input_char == 0x04 /* Ctrl+D*/) {
+ if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) {
end_of_stream = true;
break;
}
char32_t code = getchar32();
if (code == '[' || code == 0x1B) {
// Discard the rest of the escape sequence
- while ((code = getchar32()) != WEOF) {
+ while ((code = getchar32()) != (char32_t) WEOF) {
if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') {
break;
}
float mirostat_tau = 5.00f; // target entropy
float mirostat_eta = 0.10f; // learning rate
- std::string model = "models/7B/ggml-model.bin"; // model path
- std::string prompt = "";
+ std::string model = "models/7B/ggml-model.bin"; // model path
+ std::string prompt = "";
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
std::string input_prefix = ""; // string to prefix user inputs with
std::string input_suffix = ""; // string to suffix user inputs with
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
std::string lora_adapter = ""; // lora adapter path
- std::string lora_base = ""; // base model path for the lora adapter
+ std::string lora_base = ""; // base model path for the lora adapter
bool memory_f16 = true; // use f16 instead of f32 for memory kv
bool random_prompt = false; // do not randomize prompt if none provided
size_t ctx_size;
size_t mmapped_size;
ml->calc_sizes(&ctx_size, &mmapped_size);
- fprintf(stderr, "%s: ggml ctx size = %6.2f KB\n", __func__, ctx_size/1024.0);
+ fprintf(stderr, "%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/1024.0/1024.0);
// print memory requirements
{
-#include "llama.h"
#include "ggml.h"
-#include <cassert>
-#include <cmath>
+#include "llama.h"
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
#include <numeric>
#include <cassert>
#include <iostream>
#include <vector>
#include <algorithm>
-
void dump(const llama_token_data_array * candidates) {
for (size_t i = 0; i < candidates->size; i++) {
printf("%d: %f (%f)\n", candidates->data[i].id, candidates->data[i].p, candidates->data[i].logit);