From: Georgi Gerganov Date: Fri, 20 Oct 2023 07:05:28 +0000 (+0300) Subject: gpt-2 : add ignore-eos flag X-Git-Tag: upstream/0.0.1642~1213 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=f25addb9e88853648c60b96fd73ab95709d25e53;p=pkg%2Fggml%2Fsources%2Fggml gpt-2 : add ignore-eos flag --- diff --git a/examples/common.cpp b/examples/common.cpp index d55708ad..603c655a 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -38,8 +38,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.seed = std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(get_next_arg(i, argc, argv, arg, params)); - } else if (arg == "-ngl" || arg == "--gpu-layers" || arg == "--n-gpu-layers") { - params.n_gpu_layers = std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "-p" || arg == "--prompt") { params.prompt = get_next_arg(i, argc, argv, arg, params); } else if (arg == "-n" || arg == "--n_predict") { @@ -60,6 +58,10 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.n_batch= std::stoi(get_next_arg(i, argc, argv, arg, params)); } else if (arg == "-c" || arg == "--context") { params.n_ctx= std::stoi(get_next_arg(i, argc, argv, arg, params)); + } else if (arg == "-ngl" || arg == "--gpu-layers" || arg == "--n-gpu-layers") { + params.n_gpu_layers = std::stoi(get_next_arg(i, argc, argv, arg, params)); + } else if (arg == "--ignore-eos") { + params.ignore_eos = true; } else if (arg == "-m" || arg == "--model") { params.model = get_next_arg(i, argc, argv, arg, params); } else if (arg == "-i" || arg == "--interactive") { @@ -101,7 +103,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " -h, --help show this help message and exit\n"); fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n"); fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); - fprintf(stderr, " -ngl N, --gpu-layers N number of layers to offload to GPU on supported models (default: %d)\n", params.n_gpu_layers); fprintf(stderr, " -p PROMPT, --prompt PROMPT\n"); fprintf(stderr, " prompt to start generation with (default: random)\n"); fprintf(stderr, " -f FNAME, --file FNAME\n"); @@ -116,6 +117,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --repeat-penalty N penalize repeat sequence of tokens (default: %.2f, 1.0 = disabled)\n", (double)params.repeat_penalty); fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); fprintf(stderr, " -c N, --context N context / KV cache size (default: %d)\n", params.n_ctx); + fprintf(stderr, " --ignore-eos ignore EOS token during generation\n"); + fprintf(stderr, " -ngl N, --gpu-layers N number of layers to offload to GPU on supported models (default: %d)\n", params.n_gpu_layers); fprintf(stderr, " -m FNAME, --model FNAME\n"); fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); fprintf(stderr, "\n"); diff --git a/examples/common.h b/examples/common.h index 635afc24..1d4e9c37 100644 --- a/examples/common.h +++ b/examples/common.h @@ -15,12 +15,15 @@ // struct gpt_params { - int32_t seed = -1; // RNG seed - int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); - int32_t n_predict = 200; // new tokens to predict - int32_t n_parallel = 1; // number of parallel streams - int32_t n_batch = 8; // batch size for prompt processing - int32_t n_ctx = 2048; // context size (this is the KV cache max size) + int32_t seed = -1; // RNG seed + int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); + int32_t n_predict = 200; // new tokens to predict + int32_t n_parallel = 1; // number of parallel streams + int32_t n_batch = 8; // batch size for prompt processing + int32_t n_ctx = 2048; // context size (this is the KV cache max size) + int32_t n_gpu_layers = 0; // number of layers to offlload to the GPU + + bool ignore_eos = false; // ignore EOS token when generating text // sampling parameters int32_t top_k = 40; @@ -35,8 +38,6 @@ struct gpt_params { bool interactive = false; int32_t interactive_port = -1; - - int32_t n_gpu_layers = 0; }; bool gpt_params_parse(int argc, char ** argv, gpt_params & params); diff --git a/examples/gpt-2/main-batched.cpp b/examples/gpt-2/main-batched.cpp index 1c499bd5..76066ce8 100644 --- a/examples/gpt-2/main-batched.cpp +++ b/examples/gpt-2/main-batched.cpp @@ -1136,7 +1136,7 @@ int main(int argc, char ** argv) { } // is it an end of stream? -> mark the stream as finished - if (id == 50256 || n_cur == n_len - 1) { + if ((!params.ignore_eos && id == 50256) || n_cur == n_len - 1) { i_batch[i] = -1; printf("\n"); if (n_parallel > 1) { diff --git a/examples/gpt-2/main.cpp b/examples/gpt-2/main.cpp index 5702fd8e..514f299a 100644 --- a/examples/gpt-2/main.cpp +++ b/examples/gpt-2/main.cpp @@ -975,7 +975,7 @@ int main(int argc, char ** argv) { fflush(stdout); // end of text token - if (embd.back() == 50256) { + if (!params.ignore_eos && embd.back() == 50256) { break; } }