starcoder : update example to follow the naming convention of other examples (#153)

author Ravindra Marella <redacted>

Sat, 13 May 2023 13:47:02 +0000 (19:17 +0530)

committer GitHub <redacted>

Sat, 13 May 2023 13:47:02 +0000 (16:47 +0300)
author Ravindra Marella <redacted>
Sat, 13 May 2023 13:47:02 +0000 (19:17 +0530)
committer GitHub <redacted>
Sat, 13 May 2023 13:47:02 +0000 (16:47 +0300)
diff --git a/examples/starcoder/README.md b/examples/starcoder/README.md

index 194757863dd9d5cf314a792a7021edfccaaa2be2..8a43ab7026bbee746edbcf63d2b755626a585ea8 100644 (file)
--- a/examples/starcoder/README.md
+++ b/examples/starcoder/README.md
@@ -36,16 +36,16 @@ options:
  
  $ ./bin/starcoder -m ../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin -p "def fibonnaci(" -t 4 --top_k 0 --top_p 0.95 --temp 0.2      
  main: seed = 1683881276
-gpt2_model_load: loading model from '../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin'
-gpt2_model_load: n_vocab = 49280
-gpt2_model_load: n_ctx   = 2048
-gpt2_model_load: n_embd  = 2048
-gpt2_model_load: n_head  = 16
-gpt2_model_load: n_layer = 24
-gpt2_model_load: ftype   = 3
-gpt2_model_load: ggml ctx size = 1794.90 MB
-gpt2_model_load: memory size =   768.00 MB, n_mem = 49152
-gpt2_model_load: model size  =  1026.83 MB
+starcoder_model_load: loading model from '../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin'
+starcoder_model_load: n_vocab = 49280
+starcoder_model_load: n_ctx   = 2048
+starcoder_model_load: n_embd  = 2048
+starcoder_model_load: n_head  = 16
+starcoder_model_load: n_layer = 24
+starcoder_model_load: ftype   = 3
+starcoder_model_load: ggml ctx size = 1794.90 MB
+starcoder_model_load: memory size =   768.00 MB, n_mem = 49152
+starcoder_model_load: model size  =  1026.83 MB
  main: prompt: 'def fibonnaci('
  main: number of tokens in prompt = 7, first 8 tokens: 563 24240 78 2658 64 2819 7 
  
@@ -109,4 +109,4 @@ You can also try to quantize the `ggml` models via 4-bit integer quantization.
  | Model | Original size | Quantized size | Quantization type |
  | --- | --- | --- | --- |
  | `bigcode/gpt_bigcode-santacoder` | 5396.45 MB | 1026.83 MB | 4-bit integer (q4_1) |
-| `bigcode/starcoder` | 71628.23 MB | 13596.23 MB | 4-bit integer (q4_1) |
-\ No newline at end of file
+| `bigcode/starcoder` | 71628.23 MB | 13596.23 MB | 4-bit integer (q4_1) |
diff --git a/examples/starcoder/main.cpp b/examples/starcoder/main.cpp

index af9151cd5aa7b022b47f5b2ac5f60db64eebab75..d625a22a97b5fa5015def5e3fe18a35db8f58c02 100644 (file)
--- a/examples/starcoder/main.cpp
+++ b/examples/starcoder/main.cpp
@@ -16,7 +16,7 @@
  
  // default hparams (GPT-2 117M)
  // https://huggingface.co/bigcode/gpt_bigcode-santacoder/blob/main/config.json
-struct gpt2_hparams {
+struct starcoder_hparams {
      int32_t n_vocab = 49280;
      int32_t n_ctx   = 2048;
      int32_t n_embd  = 2048;
@@ -25,7 +25,7 @@ struct gpt2_hparams {
      int32_t ftype   = 1;
  };
  
-struct gpt2_layer {
+struct starcoder_layer {
      // normalization
      struct ggml_tensor * ln_1_g;
      struct ggml_tensor * ln_1_b;
@@ -48,8 +48,8 @@ struct gpt2_layer {
      struct ggml_tensor * c_mlp_proj_b;
  };
  
-struct gpt2_model {
-    gpt2_hparams hparams;
+struct starcoder_model {
+    starcoder_hparams hparams;
  
      // normalization
      struct ggml_tensor * ln_f_g;
@@ -59,7 +59,7 @@ struct gpt2_model {
      struct ggml_tensor * wpe;     //    token embedding
      struct ggml_tensor * lm_head; // language model head
  
-    std::vector<gpt2_layer> layers;
+    std::vector<starcoder_layer> layers;
  
      // key + value memory
      struct ggml_tensor * memory_k;
@@ -71,7 +71,7 @@ struct gpt2_model {
  };
  
  // load the model's weights from a file
-bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab) {
+bool starcoder_model_load(const std::string & fname, starcoder_model & model, gpt_vocab & vocab) {
      printf("%s: loading model from '%s'\n", __func__, fname.c_str());
  
      auto fin = std::ifstream(fname, std::ios::binary);
@@ -388,8 +388,8 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
  //   - embd_inp:  the embeddings of the tokens in the context
  //   - embd_w:    the predicted logits for the next token
  //
-bool gpt2_eval(
-        const gpt2_model & model,
+bool starcoder_eval(
+        const starcoder_model & model,
          const int n_threads,
          const int n_past,
          const std::vector<gpt_vocab::id> & embd_inp,
@@ -729,13 +729,13 @@ int main(int argc, char ** argv) {
      int64_t t_load_us = 0;
  
      gpt_vocab vocab;
-    gpt2_model model;
+    starcoder_model model;
  
      // load the model
      {
          const int64_t t_start_us = ggml_time_us();
  
-        if (!gpt2_model_load(params.model, model, vocab)) {
+        if (!starcoder_model_load(params.model, model, vocab)) {
              fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
              return 1;
          }
@@ -768,14 +768,14 @@ int main(int argc, char ** argv) {
  
      // determine the required inference memory per token:
      size_t mem_per_token = 0;
-    gpt2_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
+    starcoder_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
  
      for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
          // predict
          if (embd.size() > 0) {
              const int64_t t_start_us = ggml_time_us();
  
-            if (!gpt2_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) {
+            if (!starcoder_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) {
                  printf("Failed to predict\n");
                  return 1;
              }
diff --git a/examples/starcoder/quantize.cpp b/examples/starcoder/quantize.cpp

index 2ed612d00c1fcd8e63366e9384bd520b8e73fafd..7811539925916efc2f810bf1193ba9211bce0d8d 100644 (file)
--- a/examples/starcoder/quantize.cpp
+++ b/examples/starcoder/quantize.cpp
@@ -14,7 +14,7 @@
  #include <regex>
  
  // default hparams (GPT-2 117M)
-struct gpt2_hparams {
+struct starcoder_hparams {
      int32_t n_vocab = 49280;
      int32_t n_ctx   = 2048;
      int32_t n_embd  = 2048;
@@ -24,7 +24,7 @@ struct gpt2_hparams {
  };
  
  // quantize a model
-bool gpt2_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) {
+bool starcoder_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) {
      gpt_vocab vocab;
  
      printf("%s: loading model from '%s'\n", __func__, fname_inp.c_str());
@@ -53,7 +53,7 @@ bool gpt2_model_quantize(const std::string & fname_inp, const std::string & fnam
          fout.write((char *) &magic, sizeof(magic));
      }
  
-    gpt2_hparams hparams;
+    starcoder_hparams hparams;
  
      // load hparams
      {
@@ -157,7 +157,7 @@ int main(int argc, char ** argv) {
      {
          const int64_t t_start_us = ggml_time_us();
  
-        if (!gpt2_model_quantize(fname_inp, fname_out, ggml_ftype(ftype))) {
+        if (!starcoder_model_quantize(fname_inp, fname_out, ggml_ftype(ftype))) {
              fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str());
              return 1;
          }
author	Ravindra Marella <redacted>
	Sat, 13 May 2023 13:47:02 +0000 (19:17 +0530)
committer	GitHub <redacted>
	Sat, 13 May 2023 13:47:02 +0000 (16:47 +0300)
examples/starcoder/README.md		patch \| blob \| history
examples/starcoder/main.cpp		patch \| blob \| history
examples/starcoder/quantize.cpp		patch \| blob \| history