return true;
}
-void gpt_print_usage(int argc, char ** argv, const gpt_params & params) {
+void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
return result;
}
-void gpt_vocab::add_special_token(const std::string &token) {
+void gpt_vocab::add_special_token(const std::string & token) {
special_tokens.push_back(token);
}
std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text) {
std::vector<std::string> words;
-
// first split the text into words
{
std::string str = text;
// Generate the subpattern from the special_tokens vector if it's not empty
if (!vocab.special_tokens.empty()) {
std::string special_tokens_subpattern;
- for (const auto &token : vocab.special_tokens) {
+ for (const auto & token : vocab.special_tokens) {
if (!special_tokens_subpattern.empty()) {
special_tokens_subpattern += "|";
}
return true;
}
+
+float similarity(const std::string & s0, const std::string & s1) {
+ const size_t len0 = s0.size() + 1;
+ const size_t len1 = s1.size() + 1;
+
+ std::vector<int> col(len1, 0);
+ std::vector<int> prevCol(len1, 0);
+
+ for (size_t i = 0; i < len1; i++) {
+ prevCol[i] = i;
+ }
+
+ for (size_t i = 0; i < len0; i++) {
+ col[0] = i;
+ for (size_t j = 1; j < len1; j++) {
+ col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (i > 0 && s0[i - 1] == s1[j - 1] ? 0 : 1));
+ }
+ col.swap(prevCol);
+ }
+
+ const float dist = prevCol[len1 - 1];
+
+ return 1.0f - (dist / std::max(s0.size(), s1.size()));
+}
std::map<id, token> id_to_token;
std::vector<std::string> special_tokens;
- void add_special_token(const std::string &token);
+ void add_special_token(const std::string & token);
};
// poor-man's JSON parsing
float freq_thold,
bool verbose);
+// compute similarity between two strings using Levenshtein distance
+float similarity(const std::string & s0, const std::string & s1);
cp -rpv ../whisper.cpp/ggml-opencl.h src/ggml-opencl.h
cp -rpv ../whisper.cpp/ggml-opencl.c src/ggml-opencl.c
cp -rpv ../whisper.cpp/ggml.h include/ggml/ggml.h
+cp -rpv ../whisper.cpp/examples/common.h examples/common.h
+cp -rpv ../whisper.cpp/examples/common.cpp examples/common.cpp
cp -rpv ../whisper.cpp/examples/common-ggml.h examples/common-ggml.h
cp -rpv ../whisper.cpp/examples/common-ggml.cpp examples/common-ggml.cpp
cp -rpv ../whisper.cpp/whisper.h examples/whisper/whisper.h