We could use std::unordered_map over std::map (#305)

author Fabio R. Sluzala <redacted>

Tue, 21 Mar 2023 17:21:50 +0000 (14:21 -0300)

committer GitHub <redacted>

Tue, 21 Mar 2023 17:21:50 +0000 (19:21 +0200)
author Fabio R. Sluzala <redacted>
Tue, 21 Mar 2023 17:21:50 +0000 (14:21 -0300)
committer GitHub <redacted>
Tue, 21 Mar 2023 17:21:50 +0000 (19:21 +0200)
diff --git a/main.cpp b/main.cpp

index 43b82b1e49b454c3de5240533a946f64210a1885..fe9e583f8c51354794a6723bed999ed2e9b2e2b8 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -9,7 +9,6 @@
  #include <cstring>
  #include <fstream>
  #include <iostream>
-#include <map>
  #include <string>
  #include <vector>
  
@@ -69,7 +68,7 @@ void set_console_state(console_state new_st)
  static const int EOS_TOKEN_ID = 2;
  
  // determine number of model parts based on the dimension
-static const std::map<int, int> LLAMA_N_PARTS = {
+static const std::unordered_map<int, int> LLAMA_N_PARTS = {
      { 4096, 1 },
      { 5120, 2 },
      { 6656, 4 },
@@ -123,7 +122,7 @@ struct llama_model {
  
      //
      struct ggml_context * ctx;
-    std::map<std::string, struct ggml_tensor *> tensors;
+    std::unordered_map<std::string, struct ggml_tensor *> tensors;
  };
  
  // load the model's weights from a file
@@ -208,6 +207,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
      // load vocab
      {
          std::string word;
+        vocab.id_to_token.resize(model.hparams.n_vocab);
          std::vector<char> tmp(64);
  
          for (int i = 0; i < model.hparams.n_vocab; i++) {
@@ -227,8 +227,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
              fin.read((char *) &score, sizeof(score));
  
              vocab.token_to_id[word] = i;
-            vocab.id_to_token[i] = word;
-            vocab.score[i] = score;
+
+            auto &tok_score = vocab.id_to_token[i];
+            tok_score.tok = word;
+            tok_score.score = score;
          }
      }
  
@@ -1028,7 +1030,7 @@ int main(int argc, char ** argv) {
      fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
      fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
      for (int i = 0; i < (int) embd_inp.size(); i++) {
-        fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
+        fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).tok.c_str());
      }
      fprintf(stderr, "\n");
      if (params.interactive) {
@@ -1154,7 +1156,7 @@ int main(int argc, char ** argv) {
          // display text
          if (!input_noecho) {
              for (auto id : embd) {
-                printf("%s", vocab.id_to_token[id].c_str());
+                printf("%s", vocab.id_to_token[id].tok.c_str());
              }
              fflush(stdout);
          }
@@ -1169,7 +1171,7 @@ int main(int argc, char ** argv) {
              // check for reverse prompt
              std::string last_output;
              for (auto id : last_n_tokens) {
-                last_output += vocab.id_to_token[id];
+                last_output += vocab.id_to_token[id].tok;
              }
  
              // Check if each of the reverse prompts appears at the end of the output.
diff --git a/quantize.cpp b/quantize.cpp

index b90f34f480cb35a8674e0e6356e771f07b20d17c..52b7ac9b3d242a0501a7857fc5572a1820f1d4d7 100644 (file)
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -8,7 +8,6 @@
  #include <cstdio>
  #include <cstring>
  #include <fstream>
-#include <map>
  #include <string>
  #include <vector>
  #include <regex>
@@ -130,6 +129,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
          }
  
          std::string word;
+        vocab.id_to_token.resize(n_vocab);
          for (int i = 0; i < n_vocab; i++) {
              uint32_t len;
              finp.read ((char *) &len, sizeof(len));
@@ -144,8 +144,10 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
              fout.write((char *) &score, sizeof(score));
  
              vocab.token_to_id[word] = i;
-            vocab.id_to_token[i] = word;
-            vocab.score[i] = score;
+
+            auto &tok_score = vocab.id_to_token[i];
+            tok_score.tok = word;
+            tok_score.score = score;
          }
      }
  
diff --git a/utils.cpp b/utils.cpp

index 7c6864c8f4b8699324189c738f25c6c1645fe916..b15c68adeea72a807800bf1f1d00d3ffe8da5efd 100644 (file)
--- a/utils.cpp
+++ b/utils.cpp
@@ -155,8 +155,8 @@ void replace(std::string & str, const std::string & needle, const std::string &
      }
  }
  
-std::map<std::string, int32_t> json_parse(const std::string & fname) {
-    std::map<std::string, int32_t> result;
+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname) {
+    std::unordered_map<std::string, int32_t> result;
  
      // read file into string
      std::string json;
@@ -360,16 +360,16 @@ private:
              return;
          }
  
-        auto score = vocab_.score.find((*token).second);
-
-        if (score == vocab_.score.end()) {
+        if (static_cast<size_t>((*token).second) >= vocab_.id_to_token.size()) {
              return;
          }
  
+        const auto &tok_score = vocab_.id_to_token[(*token).second];
+
          llama_sp_bigram bigram;
          bigram.left = left;
          bigram.right = right;
-        bigram.score = (*score).second;
+        bigram.score = tok_score.score;
          bigram.size = text.size();
          work_queue_.push(bigram);
      }
@@ -393,6 +393,8 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) {
      std::string word;
      std::vector<char> tmp(64);
  
+    vocab.id_to_token.resize(n_vocab);
+
      for (int i = 0; i < n_vocab; i++) {
          uint32_t len;
          fin.read((char *) &len, sizeof(len));
@@ -410,8 +412,10 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) {
          fin.read((char *) &score, sizeof(score));
  
          vocab.token_to_id[word] = i;
-        vocab.id_to_token[i] = word;
-        vocab.score[i] = score;
+
+        auto &tok_score = vocab.id_to_token[i];
+        tok_score.tok = word;
+        tok_score.score = score;
      }
  
      return true;
diff --git a/utils.h b/utils.h

index 6693775c57d7950b9f44ca3d83cf7d08fceeffdd..31290385993402dccc360ab08f9d7e355237aac9 100644 (file)
--- a/utils.h
+++ b/utils.h
@@ -3,7 +3,7 @@
  #pragma once
  
  #include <string>
-#include <map>
+#include <unordered_map>
  #include <vector>
  #include <random>
  #include <thread>
@@ -65,15 +65,19 @@ struct llama_vocab {
      using id    = int32_t;
      using token = std::string;
  
-    std::map<token, id> token_to_id;
-    std::map<id, token> id_to_token;
-    std::map<id, float> score;
+    struct token_score {
+        token tok;
+        float score;
+    };
+
+    std::unordered_map<token, id> token_to_id;
+    std::vector<token_score> id_to_token;
  };
  
  void replace(std::string & str, const std::string & needle, const std::string & replacement);
  
  // poor-man's JSON parsing
-std::map<std::string, int32_t> json_parse(const std::string & fname);
+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
  
  // TODO: temporary until #77 is merged, need this now for some tokenizer tests
  bool llama_vocab_load(const std::string & fname, llama_vocab & vocab);
author	Fabio R. Sluzala <redacted>
	Tue, 21 Mar 2023 17:21:50 +0000 (14:21 -0300)
committer	GitHub <redacted>
	Tue, 21 Mar 2023 17:21:50 +0000 (19:21 +0200)
main.cpp		patch \| blob \| history
quantize.cpp		patch \| blob \| history
utils.cpp		patch \| blob \| history
utils.h		patch \| blob \| history