From: Ravindra Marella Date: Sat, 13 May 2023 13:47:02 +0000 (+0530) Subject: starcoder : update example to follow the naming convention of other examples (#153) X-Git-Tag: upstream/0.0.1642~1480 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=93fa3864355f17b895cc677113ce42aacbdf631b;p=pkg%2Fggml%2Fsources%2Fggml starcoder : update example to follow the naming convention of other examples (#153) --- diff --git a/examples/starcoder/README.md b/examples/starcoder/README.md index 19475786..8a43ab70 100644 --- a/examples/starcoder/README.md +++ b/examples/starcoder/README.md @@ -36,16 +36,16 @@ options: $ ./bin/starcoder -m ../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin -p "def fibonnaci(" -t 4 --top_k 0 --top_p 0.95 --temp 0.2 main: seed = 1683881276 -gpt2_model_load: loading model from '../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin' -gpt2_model_load: n_vocab = 49280 -gpt2_model_load: n_ctx = 2048 -gpt2_model_load: n_embd = 2048 -gpt2_model_load: n_head = 16 -gpt2_model_load: n_layer = 24 -gpt2_model_load: ftype = 3 -gpt2_model_load: ggml ctx size = 1794.90 MB -gpt2_model_load: memory size = 768.00 MB, n_mem = 49152 -gpt2_model_load: model size = 1026.83 MB +starcoder_model_load: loading model from '../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin' +starcoder_model_load: n_vocab = 49280 +starcoder_model_load: n_ctx = 2048 +starcoder_model_load: n_embd = 2048 +starcoder_model_load: n_head = 16 +starcoder_model_load: n_layer = 24 +starcoder_model_load: ftype = 3 +starcoder_model_load: ggml ctx size = 1794.90 MB +starcoder_model_load: memory size = 768.00 MB, n_mem = 49152 +starcoder_model_load: model size = 1026.83 MB main: prompt: 'def fibonnaci(' main: number of tokens in prompt = 7, first 8 tokens: 563 24240 78 2658 64 2819 7 @@ -109,4 +109,4 @@ You can also try to quantize the `ggml` models via 4-bit integer quantization. | Model | Original size | Quantized size | Quantization type | | --- | --- | --- | --- | | `bigcode/gpt_bigcode-santacoder` | 5396.45 MB | 1026.83 MB | 4-bit integer (q4_1) | -| `bigcode/starcoder` | 71628.23 MB | 13596.23 MB | 4-bit integer (q4_1) | \ No newline at end of file +| `bigcode/starcoder` | 71628.23 MB | 13596.23 MB | 4-bit integer (q4_1) | diff --git a/examples/starcoder/main.cpp b/examples/starcoder/main.cpp index af9151cd..d625a22a 100644 --- a/examples/starcoder/main.cpp +++ b/examples/starcoder/main.cpp @@ -16,7 +16,7 @@ // default hparams (GPT-2 117M) // https://huggingface.co/bigcode/gpt_bigcode-santacoder/blob/main/config.json -struct gpt2_hparams { +struct starcoder_hparams { int32_t n_vocab = 49280; int32_t n_ctx = 2048; int32_t n_embd = 2048; @@ -25,7 +25,7 @@ struct gpt2_hparams { int32_t ftype = 1; }; -struct gpt2_layer { +struct starcoder_layer { // normalization struct ggml_tensor * ln_1_g; struct ggml_tensor * ln_1_b; @@ -48,8 +48,8 @@ struct gpt2_layer { struct ggml_tensor * c_mlp_proj_b; }; -struct gpt2_model { - gpt2_hparams hparams; +struct starcoder_model { + starcoder_hparams hparams; // normalization struct ggml_tensor * ln_f_g; @@ -59,7 +59,7 @@ struct gpt2_model { struct ggml_tensor * wpe; // token embedding struct ggml_tensor * lm_head; // language model head - std::vector layers; + std::vector layers; // key + value memory struct ggml_tensor * memory_k; @@ -71,7 +71,7 @@ struct gpt2_model { }; // load the model's weights from a file -bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab) { +bool starcoder_model_load(const std::string & fname, starcoder_model & model, gpt_vocab & vocab) { printf("%s: loading model from '%s'\n", __func__, fname.c_str()); auto fin = std::ifstream(fname, std::ios::binary); @@ -388,8 +388,8 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & // - embd_inp: the embeddings of the tokens in the context // - embd_w: the predicted logits for the next token // -bool gpt2_eval( - const gpt2_model & model, +bool starcoder_eval( + const starcoder_model & model, const int n_threads, const int n_past, const std::vector & embd_inp, @@ -729,13 +729,13 @@ int main(int argc, char ** argv) { int64_t t_load_us = 0; gpt_vocab vocab; - gpt2_model model; + starcoder_model model; // load the model { const int64_t t_start_us = ggml_time_us(); - if (!gpt2_model_load(params.model, model, vocab)) { + if (!starcoder_model_load(params.model, model, vocab)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } @@ -768,14 +768,14 @@ int main(int argc, char ** argv) { // determine the required inference memory per token: size_t mem_per_token = 0; - gpt2_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); + starcoder_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); - if (!gpt2_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) { + if (!starcoder_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) { printf("Failed to predict\n"); return 1; } diff --git a/examples/starcoder/quantize.cpp b/examples/starcoder/quantize.cpp index 2ed612d0..78115399 100644 --- a/examples/starcoder/quantize.cpp +++ b/examples/starcoder/quantize.cpp @@ -14,7 +14,7 @@ #include // default hparams (GPT-2 117M) -struct gpt2_hparams { +struct starcoder_hparams { int32_t n_vocab = 49280; int32_t n_ctx = 2048; int32_t n_embd = 2048; @@ -24,7 +24,7 @@ struct gpt2_hparams { }; // quantize a model -bool gpt2_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) { +bool starcoder_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) { gpt_vocab vocab; printf("%s: loading model from '%s'\n", __func__, fname_inp.c_str()); @@ -53,7 +53,7 @@ bool gpt2_model_quantize(const std::string & fname_inp, const std::string & fnam fout.write((char *) &magic, sizeof(magic)); } - gpt2_hparams hparams; + starcoder_hparams hparams; // load hparams { @@ -157,7 +157,7 @@ int main(int argc, char ** argv) { { const int64_t t_start_us = ggml_time_us(); - if (!gpt2_model_quantize(fname_inp, fname_out, ggml_ftype(ftype))) { + if (!starcoder_model_quantize(fname_inp, fname_out, ggml_ftype(ftype))) { fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str()); return 1; }