* Improve performance by changing std::map to std::unordered_map and std::map<id, token> id_to_token; to std::vector<token> id_to_token;
* fix last commit on gpt_vocab_init add vocab.id_to_token.resize(vocab.token_to_id.size());
* Removed include <map>
* Nest struct token score inside gpt_vocab
* renamed token to tok
#include <cstring>
#include <fstream>
#include <iostream>
-#include <map>
#include <string>
#include <vector>
static const int EOS_TOKEN_ID = 2;
// determine number of model parts based on the dimension
-static const std::map<int, int> LLAMA_N_PARTS = {
+static const std::unordered_map<int, int> LLAMA_N_PARTS = {
{ 4096, 1 },
{ 5120, 2 },
{ 6656, 4 },
//
struct ggml_context * ctx;
- std::map<std::string, struct ggml_tensor *> tensors;
+ std::unordered_map<std::string, struct ggml_tensor *> tensors;
};
// load the model's weights from a file
// load vocab
{
std::string word;
+ vocab.id_to_token.resize(model.hparams.n_vocab);
std::vector<char> tmp(64);
for (int i = 0; i < model.hparams.n_vocab; i++) {
fin.read((char *) &score, sizeof(score));
vocab.token_to_id[word] = i;
- vocab.id_to_token[i] = word;
- vocab.score[i] = score;
+
+ auto &tok_score = vocab.id_to_token[i];
+ tok_score.tok = word;
+ tok_score.score = score;
}
}
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
for (int i = 0; i < (int) embd_inp.size(); i++) {
- fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
+ fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).tok.c_str());
}
fprintf(stderr, "\n");
if (params.interactive) {
// display text
if (!input_noecho) {
for (auto id : embd) {
- printf("%s", vocab.id_to_token[id].c_str());
+ printf("%s", vocab.id_to_token[id].tok.c_str());
}
fflush(stdout);
}
// check for reverse prompt
std::string last_output;
for (auto id : last_n_tokens) {
- last_output += vocab.id_to_token[id];
+ last_output += vocab.id_to_token[id].tok;
}
// Check if each of the reverse prompts appears at the end of the output.
#include <cstdio>
#include <cstring>
#include <fstream>
-#include <map>
#include <string>
#include <vector>
#include <regex>
}
std::string word;
+ vocab.id_to_token.resize(n_vocab);
for (int i = 0; i < n_vocab; i++) {
uint32_t len;
finp.read ((char *) &len, sizeof(len));
fout.write((char *) &score, sizeof(score));
vocab.token_to_id[word] = i;
- vocab.id_to_token[i] = word;
- vocab.score[i] = score;
+
+ auto &tok_score = vocab.id_to_token[i];
+ tok_score.tok = word;
+ tok_score.score = score;
}
}
}
}
-std::map<std::string, int32_t> json_parse(const std::string & fname) {
- std::map<std::string, int32_t> result;
+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname) {
+ std::unordered_map<std::string, int32_t> result;
// read file into string
std::string json;
return;
}
- auto score = vocab_.score.find((*token).second);
-
- if (score == vocab_.score.end()) {
+ if (static_cast<size_t>((*token).second) >= vocab_.id_to_token.size()) {
return;
}
+ const auto &tok_score = vocab_.id_to_token[(*token).second];
+
llama_sp_bigram bigram;
bigram.left = left;
bigram.right = right;
- bigram.score = (*score).second;
+ bigram.score = tok_score.score;
bigram.size = text.size();
work_queue_.push(bigram);
}
std::string word;
std::vector<char> tmp(64);
+ vocab.id_to_token.resize(n_vocab);
+
for (int i = 0; i < n_vocab; i++) {
uint32_t len;
fin.read((char *) &len, sizeof(len));
fin.read((char *) &score, sizeof(score));
vocab.token_to_id[word] = i;
- vocab.id_to_token[i] = word;
- vocab.score[i] = score;
+
+ auto &tok_score = vocab.id_to_token[i];
+ tok_score.tok = word;
+ tok_score.score = score;
}
return true;
#pragma once
#include <string>
-#include <map>
+#include <unordered_map>
#include <vector>
#include <random>
#include <thread>
using id = int32_t;
using token = std::string;
- std::map<token, id> token_to_id;
- std::map<id, token> id_to_token;
- std::map<id, float> score;
+ struct token_score {
+ token tok;
+ float score;
+ };
+
+ std::unordered_map<token, id> token_to_id;
+ std::vector<token_score> id_to_token;
};
void replace(std::string & str, const std::string & needle, const std::string & replacement);
// poor-man's JSON parsing
-std::map<std::string, int32_t> json_parse(const std::string & fname);
+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
// TODO: temporary until #77 is merged, need this now for some tokenizer tests
bool llama_vocab_load(const std::string & fname, llama_vocab & vocab);