Fix convert script, warnings alpaca instructions, default params

author Georgi Gerganov <redacted>

Tue, 21 Mar 2023 15:59:16 +0000 (17:59 +0200)

committer Georgi Gerganov <redacted>

Tue, 21 Mar 2023 15:59:16 +0000 (17:59 +0200)
author Georgi Gerganov <redacted>
Tue, 21 Mar 2023 15:59:16 +0000 (17:59 +0200)
committer Georgi Gerganov <redacted>
Tue, 21 Mar 2023 15:59:16 +0000 (17:59 +0200)
diff --git a/README.md b/README.md

index 349e82c56118fea8e7cf325992f41582c28e95fb..f0b0c2a5b6671bfc4ed093acbe302c5e3d56be74 100644 (file)
--- a/README.md
+++ b/README.md
@@ -193,15 +193,15 @@ First, download the `ggml` Alpaca model into the `./models` folder:
  ```
  # use one of these
  # TODO: add a script to simplify the download
-curl -o ggml2-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
-curl -o ggml2-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
-curl -o ggml2-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
+curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
+curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
+curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
  ```
  
  Now run the `main` tool like this:
  
  ```
-./main -m ./models/ggml2-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins
+./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins
  ```
  
  Sample run:
@@ -218,7 +218,7 @@ Sample run:
  There 26 letters in the English Alphabet
  > What is the most common way of transportation in Amsterdam?
  The majority (54%) are using public transit. This includes buses, trams and metros with over 100 lines throughout the city which make it very accessible for tourists to navigate around town as well as locals who commute by tram or metro on a daily basis
-> List 5 words that start with "ca".                                                                       
+> List 5 words that start with "ca".
  cadaver, cauliflower, cabbage (vegetable), catalpa (tree) and Cailleach.
  > 
  ```
diff --git a/alpaca.sh b/alpaca.sh

index 284989bc048f2508d8bcdbb1e08f59d3e5fa9d89..2f36d6f54a82d2dfeeefeb53bc981421a2e8ca67 100755 (executable)
--- a/alpaca.sh
+++ b/alpaca.sh
@@ -3,4 +3,4 @@
  # Temporary script - will be removed in the future
  #
  
-./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.96 --repeat_penalty 1 -t 7
+./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7
diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py

index 46f7eba1c217c0909164000e1307a8f856433d87..db5b00fec468668fd829254364c5a613178319ac 100644 (file)
--- a/convert-pth-to-ggml.py
+++ b/convert-pth-to-ggml.py
@@ -27,9 +27,9 @@ from sentencepiece import SentencePieceProcessor
  def parse_args():
  
      parser = argparse.ArgumentParser(description='Convert a LLaMA model checkpoint to a ggml compatible file')
-    parser.add_argument('dir_model', help='directory containing the model checkpoint')
-    parser.add_argument('ftype', type=int, choices=[0, 1], default=1, help='file type (0: float32, 1: float16)')
-    parser.add_argument('vocab_only', type=bool, default=False, help='only write vocab to file')
+    parser.add_argument('dir_model',  help='directory containing the model checkpoint')
+    parser.add_argument('ftype',      help='file type (0: float32, 1: float16)', type=int, choices=[0, 1], default=1)
+    parser.add_argument('vocab_only', help='only write vocab to file', type=int, default=0, nargs='?')
      return parser.parse_args()
  
  def get_n_parts(dim):
@@ -135,6 +135,8 @@ def main():
  
      hparams, tokenizer = load_hparams_and_tokenizer(dir_model)
  
+    print(args)
+
      # if only writing vocab to file
      if args.vocab_only:
  
diff --git a/main.cpp b/main.cpp

index 662a2a79bc4c7b64faccd9c5ac2a57c4448242a1..6bae80cdf5876759ac0f24a533a711ba369f74ab 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -165,12 +165,20 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
      // load vocab
      {
          std::string word;
+        std::vector<char> tmp(64);
+
          for (int i = 0; i < model.hparams.n_vocab; i++) {
              uint32_t len;
              fin.read((char *) &len, sizeof(len));
  
              word.resize(len);
-            fin.read((char *) word.data(), len);
+            if (len > 0) {
+                tmp.resize(len);
+                fin.read(tmp.data(), len);
+                word.assign(tmp.data(), len);
+            } else {
+                word.clear();
+            }
  
              float score;
              fin.read((char *) &score, sizeof(score));
@@ -178,10 +186,6 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
              vocab.token_to_id[word] = i;
              vocab.id_to_token[i] = word;
              vocab.score[i] = score;
-
-            //if (i < 30000) {
-            //    fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str());
-            //}
          }
      }
  
@@ -974,7 +978,7 @@ int main(int argc, char ** argv) {
          n_past += embd.size();
          embd.clear();
  
-        if (embd_inp.size() <= input_consumed) {
+        if ((int) embd_inp.size() <= input_consumed) {
              // out of user input, sample next token
              const float top_k = params.top_k;
              const float top_p = params.top_p;
@@ -1011,7 +1015,7 @@ int main(int argc, char ** argv) {
              --remaining_tokens;
          } else {
              // some user input remains from prompt or interaction, forward it to processing
-            while (embd_inp.size() > input_consumed) {
+            while ((int) embd_inp.size() > input_consumed) {
                  embd.push_back(embd_inp[input_consumed]);
                  last_n_tokens.erase(last_n_tokens.begin());
                  last_n_tokens.push_back(embd_inp[input_consumed]);
@@ -1036,7 +1040,7 @@ int main(int argc, char ** argv) {
  
          // in interactive mode, and not currently processing queued inputs;
          // check if we should prompt the user for more
-        if (params.interactive && embd_inp.size() <= input_consumed) {
+        if (params.interactive && (int) embd_inp.size() <= input_consumed) {
              // check for reverse prompt
              for (auto antiprompt_inp : antipromptv_inp) {
                  if (antiprompt_inp.size() && std::equal(antiprompt_inp.rbegin(), antiprompt_inp.rend(), last_n_tokens.rbegin())) {
author	Georgi Gerganov <redacted>
	Tue, 21 Mar 2023 15:59:16 +0000 (17:59 +0200)
committer	Georgi Gerganov <redacted>
	Tue, 21 Mar 2023 15:59:16 +0000 (17:59 +0200)
README.md		patch \| blob \| history
alpaca.sh		patch \| blob \| history
convert-pth-to-ggml.py		patch \| blob \| history
main.cpp		patch \| blob \| history