From: Georgi Gerganov Date: Sat, 22 Apr 2023 11:59:42 +0000 (+0300) Subject: examples : utils -> common X-Git-Tag: upstream/0.0.1642~1528 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=e433b73f48eff9679b2e01b188dd66b79df81b91;p=pkg%2Fggml%2Fsources%2Fggml examples : utils -> common --- diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 19857fda..96df1396 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,5 +1,8 @@ -add_library(ggml_utils STATIC utils.cpp) -target_include_directories(ggml_utils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +add_library(common STATIC common.cpp) +target_include_directories(common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +add_library(common-ggml STATIC common-ggml.cpp) +target_include_directories(common-ggml PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory(gpt-2) add_subdirectory(gpt-j) diff --git a/examples/common-ggml.cpp b/examples/common-ggml.cpp new file mode 100644 index 00000000..a6e41443 --- /dev/null +++ b/examples/common-ggml.cpp @@ -0,0 +1 @@ +#include "common-ggml.h" diff --git a/examples/common-ggml.h b/examples/common-ggml.h new file mode 100644 index 00000000..3f59c932 --- /dev/null +++ b/examples/common-ggml.h @@ -0,0 +1,2 @@ +#pragma once + diff --git a/examples/common.cpp b/examples/common.cpp new file mode 100644 index 00000000..fc45999b --- /dev/null +++ b/examples/common.cpp @@ -0,0 +1,481 @@ +#include "common.h" + +// third-party utilities +// use your favorite implementations +#define DR_WAV_IMPLEMENTATION +#include "dr_wav.h" + +#include +#include +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { + for (int i = 1; i < argc; i++) { + std::string arg = argv[i]; + + if (arg == "-s" || arg == "--seed") { + params.seed = std::stoi(argv[++i]); + } else if (arg == "-t" || arg == "--threads") { + params.n_threads = std::stoi(argv[++i]); + } else if (arg == "-p" || arg == "--prompt") { + params.prompt = argv[++i]; + } else if (arg == "-n" || arg == "--n_predict") { + params.n_predict = std::stoi(argv[++i]); + } else if (arg == "--top_k") { + params.top_k = std::stoi(argv[++i]); + } else if (arg == "--top_p") { + params.top_p = std::stof(argv[++i]); + } else if (arg == "--temp") { + params.temp = std::stof(argv[++i]); + } else if (arg == "-b" || arg == "--batch_size") { + params.n_batch = std::stoi(argv[++i]); + } else if (arg == "-m" || arg == "--model") { + params.model = argv[++i]; + } else if (arg == "-h" || arg == "--help") { + gpt_print_usage(argc, argv, params); + exit(0); + } else { + fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); + gpt_print_usage(argc, argv, params); + exit(0); + } + } + + return true; +} + +void gpt_print_usage(int argc, char ** argv, const gpt_params & params) { + fprintf(stderr, "usage: %s [options]\n", argv[0]); + fprintf(stderr, "\n"); + fprintf(stderr, "options:\n"); + fprintf(stderr, " -h, --help show this help message and exit\n"); + fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n"); + fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); + fprintf(stderr, " -p PROMPT, --prompt PROMPT\n"); + fprintf(stderr, " prompt to start generation with (default: random)\n"); + fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d)\n", params.n_predict); + fprintf(stderr, " --top_k N top-k sampling (default: %d)\n", params.top_k); + fprintf(stderr, " --top_p N top-p sampling (default: %.1f)\n", params.top_p); + fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp); + fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); + fprintf(stderr, " -m FNAME, --model FNAME\n"); + fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); + fprintf(stderr, "\n"); +} + +std::string gpt_random_prompt(std::mt19937 & rng) { + const int r = rng() % 10; + switch (r) { + case 0: return "So"; + case 1: return "Once upon a time"; + case 2: return "When"; + case 3: return "The"; + case 4: return "After"; + case 5: return "If"; + case 6: return "import"; + case 7: return "He"; + case 8: return "She"; + case 9: return "They"; + default: return "To"; + } + + return "The"; +} + +std::string trim(const std::string & s) { + std::regex e("^\\s+|\\s+$"); + return std::regex_replace(s, e, ""); +} + +std::string replace(const std::string & s, const std::string & from, const std::string & to) { + std::string result = s; + size_t pos = 0; + while ((pos = result.find(from, pos)) != std::string::npos) { + result.replace(pos, from.length(), to); + pos += to.length(); + } + return result; +} + +std::map json_parse(const std::string & fname) { + std::map result; + + // read file into string + std::string json; + { + std::ifstream ifs(fname); + if (!ifs) { + fprintf(stderr, "Failed to open %s\n", fname.c_str()); + exit(1); + } + + json = std::string((std::istreambuf_iterator(ifs)), + (std::istreambuf_iterator())); + } + + if (json[0] != '{') { + return result; + } + + // parse json + { + bool has_key = false; + bool in_token = false; + + std::string str_key = ""; + std::string str_val = ""; + + int n = json.size(); + for (int i = 1; i < n; ++i) { + if (!in_token) { + if (json[i] == ' ') continue; + if (json[i] == '"') { + in_token = true; + continue; + } + } else { + if (json[i] == '\\' && i+1 < n) { + if (has_key == false) { + str_key += json[i]; + } else { + str_val += json[i]; + } + ++i; + } else if (json[i] == '"') { + if (has_key == false) { + has_key = true; + ++i; + while (json[i] == ' ') ++i; + ++i; // : + while (json[i] == ' ') ++i; + if (json[i] != '\"') { + while (json[i] != ',' && json[i] != '}') { + str_val += json[i++]; + } + has_key = false; + } else { + in_token = true; + continue; + } + } else { + has_key = false; + } + + str_key = ::replace(str_key, "\\u0120", " " ); // \u0120 -> space + str_key = ::replace(str_key, "\\u010a", "\n"); // \u010a -> new line + str_key = ::replace(str_key, "\\\"", "\""); // \\\" -> " + + try { + result[str_key] = std::stoi(str_val); + } catch (...) { + //fprintf(stderr, "%s: ignoring key '%s' with value '%s'\n", fname.c_str(), str_key.c_str(), str_val.c_str()); + + } + str_key = ""; + str_val = ""; + in_token = false; + continue; + } + if (has_key == false) { + str_key += json[i]; + } else { + str_val += json[i]; + } + } + } + } + + return result; +} + +std::vector gpt_tokenize(const gpt_vocab & vocab, const std::string & text) { + std::vector words; + + // first split the text into words + { + std::string str = text; + std::string pat = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)"; + + std::regex re(pat); + std::smatch m; + + while (std::regex_search(str, m, re)) { + for (auto x : m) { + words.push_back(x); + } + str = m.suffix(); + } + } + + // find the longest tokens that form the words: + std::vector tokens; + for (const auto & word : words) { + if (word.size() == 0) continue; + + int i = 0; + int n = word.size(); + while (i < n) { + int j = n; + while (j > i) { + auto it = vocab.token_to_id.find(word.substr(i, j-i)); + if (it != vocab.token_to_id.end()) { + tokens.push_back(it->second); + i = j; + break; + } + --j; + } + if (i == n) { + break; + } + if (j == i) { + auto sub = word.substr(i, 1); + if (vocab.token_to_id.find(sub) != vocab.token_to_id.end()) { + tokens.push_back(vocab.token_to_id.at(sub)); + } else { + fprintf(stderr, "%s: unknown token '%s'\n", __func__, sub.data()); + } + ++i; + } + } + } + + return tokens; +} + +bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) { + printf("%s: loading vocab from '%s'\n", __func__, fname.c_str()); + + vocab.token_to_id = ::json_parse(fname); + + for (const auto & kv : vocab.token_to_id) { + vocab.id_to_token[kv.second] = kv.first; + } + + printf("%s: vocab size = %d\n", __func__, (int) vocab.token_to_id.size()); + + // print the vocabulary + //for (auto kv : vocab.token_to_id) { + // printf("'%s' -> %d\n", kv.first.data(), kv.second); + //} + + return true; +} + +gpt_vocab::id gpt_sample_top_k_top_p( + const gpt_vocab & vocab, + const float * logits, + int top_k, + double top_p, + double temp, + std::mt19937 & rng) { + int n_logits = vocab.id_to_token.size(); + + std::vector> logits_id; + logits_id.reserve(n_logits); + + { + const double scale = 1.0/temp; + for (int i = 0; i < n_logits; ++i) { + logits_id.push_back(std::make_pair(logits[i]*scale, i)); + } + } + + // find the top K tokens + std::partial_sort( + logits_id.begin(), + logits_id.begin() + top_k, logits_id.end(), + [](const std::pair & a, const std::pair & b) { + return a.first > b.first; + }); + + logits_id.resize(top_k); + + double maxl = -INFINITY; + for (const auto & kv : logits_id) { + maxl = std::max(maxl, kv.first); + } + + // compute probs for the top K tokens + std::vector probs; + probs.reserve(logits_id.size()); + + double sum = 0.0; + for (const auto & kv : logits_id) { + double p = exp(kv.first - maxl); + probs.push_back(p); + sum += p; + } + + // normalize the probs + for (auto & p : probs) { + p /= sum; + } + + if (top_p < 1.0f) { + double cumsum = 0.0f; + for (int i = 0; i < top_k; i++) { + cumsum += probs[i]; + if (cumsum >= top_p) { + top_k = i + 1; + probs.resize(top_k); + logits_id.resize(top_k); + break; + } + } + + cumsum = 1.0/cumsum; + for (int i = 0; i < (int) probs.size(); i++) { + probs[i] *= cumsum; + } + } + + //printf("\n"); + //for (int i = 0; i < (int) probs.size(); i++) { + // printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]); + //} + //exit(0); + + std::discrete_distribution<> dist(probs.begin(), probs.end()); + int idx = dist(rng); + + return logits_id[idx].second; +} + +bool read_wav(const std::string & fname, std::vector& pcmf32, std::vector>& pcmf32s, bool stereo) { + drwav wav; + std::vector wav_data; // used for pipe input from stdin + + if (fname == "-") { + { + uint8_t buf[1024]; + while (true) + { + const size_t n = fread(buf, 1, sizeof(buf), stdin); + if (n == 0) { + break; + } + wav_data.insert(wav_data.end(), buf, buf + n); + } + } + + if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) { + fprintf(stderr, "error: failed to open WAV file from stdin\n"); + return false; + } + + fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size()); + } + else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) { + fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str()); + return false; + } + + if (wav.channels != 1 && wav.channels != 2) { + fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", __func__, fname.c_str()); + return false; + } + + if (stereo && wav.channels != 2) { + fprintf(stderr, "%s: WAV file '%s' must be stereo for diarization\n", __func__, fname.c_str()); + return false; + } + + if (wav.sampleRate != COMMON_SAMPLE_RATE) { + fprintf(stderr, "%s: WAV file '%s' must be %i kHz\n", __func__, fname.c_str(), COMMON_SAMPLE_RATE/1000); + return false; + } + + if (wav.bitsPerSample != 16) { + fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", __func__, fname.c_str()); + return false; + } + + const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8); + + std::vector pcm16; + pcm16.resize(n*wav.channels); + drwav_read_pcm_frames_s16(&wav, n, pcm16.data()); + drwav_uninit(&wav); + + // convert to mono, float + pcmf32.resize(n); + if (wav.channels == 1) { + for (uint64_t i = 0; i < n; i++) { + pcmf32[i] = float(pcm16[i])/32768.0f; + } + } else { + for (uint64_t i = 0; i < n; i++) { + pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f; + } + } + + if (stereo) { + // convert to stereo, float + pcmf32s.resize(2); + + pcmf32s[0].resize(n); + pcmf32s[1].resize(n); + for (uint64_t i = 0; i < n; i++) { + pcmf32s[0][i] = float(pcm16[2*i])/32768.0f; + pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f; + } + } + + return true; +} + +void high_pass_filter(std::vector & data, float cutoff, float sample_rate) { + const float rc = 1.0f / (2.0f * M_PI * cutoff); + const float dt = 1.0f / sample_rate; + const float alpha = dt / (rc + dt); + + float y = data[0]; + + for (size_t i = 1; i < data.size(); i++) { + y = alpha * (y + data[i] - data[i - 1]); + data[i] = y; + } +} + +bool vad_simple(std::vector & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) { + const int n_samples = pcmf32.size(); + const int n_samples_last = (sample_rate * last_ms) / 1000; + + if (n_samples_last >= n_samples) { + // not enough samples - assume no speech + return false; + } + + if (freq_thold > 0.0f) { + high_pass_filter(pcmf32, freq_thold, sample_rate); + } + + float energy_all = 0.0f; + float energy_last = 0.0f; + + for (int i = 0; i < n_samples; i++) { + energy_all += fabsf(pcmf32[i]); + + if (i >= n_samples - n_samples_last) { + energy_last += fabsf(pcmf32[i]); + } + } + + energy_all /= n_samples; + energy_last /= n_samples_last; + + if (verbose) { + fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold); + } + + if (energy_last > vad_thold*energy_all) { + return false; + } + + return true; +} diff --git a/examples/common.h b/examples/common.h new file mode 100644 index 00000000..b08e5760 --- /dev/null +++ b/examples/common.h @@ -0,0 +1,120 @@ +// Various helper functions and utilities + +#pragma once + +#include +#include +#include +#include +#include + +#define COMMON_SAMPLE_RATE 16000 + +// +// CLI argument parsing +// + +struct gpt_params { + int32_t seed = -1; // RNG seed + int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); + int32_t n_predict = 200; // new tokens to predict + + // sampling parameters + int32_t top_k = 40; + float top_p = 0.9f; + float temp = 0.9f; + + int32_t n_batch = 8; // batch size for prompt processing + + std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path + std::string prompt; +}; + +bool gpt_params_parse(int argc, char ** argv, gpt_params & params); + +void gpt_print_usage(int argc, char ** argv, const gpt_params & params); + +std::string gpt_random_prompt(std::mt19937 & rng); + +// +// Vocab utils +// + +std::string trim(const std::string & s); + +std::string replace( + const std::string & s, + const std::string & from, + const std::string & to); + +struct gpt_vocab { + using id = int32_t; + using token = std::string; + + std::map token_to_id; + std::map id_to_token; +}; + +// poor-man's JSON parsing +std::map json_parse(const std::string & fname); + +// split text into tokens +// +// ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53 +// +// Regex (Python): +// r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""" +// +// Regex (C++): +// R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)" +// +std::vector gpt_tokenize(const gpt_vocab & vocab, const std::string & text); + +// load the tokens from encoder.json +bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab); + +// sample next token given probabilities for each embedding +// +// - consider only the top K tokens +// - from them, consider only the top tokens with cumulative probability > P +// +// TODO: not sure if this implementation is correct +// TODO: temperature is not implemented +// +gpt_vocab::id gpt_sample_top_k_top_p( + const gpt_vocab & vocab, + const float * logits, + int top_k, + double top_p, + double temp, + std::mt19937 & rng); + +// +// Audio utils +// + +// Read WAV audio file and store the PCM data into pcmf32 +// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE +// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM +bool read_wav( + const std::string & fname, + std::vector & pcmf32, + std::vector> & pcmf32s, + bool stereo); + +// Apply a high-pass frequency filter to PCM audio +// Suppresses frequencies below cutoff Hz +void high_pass_filter( + std::vector & data, + float cutoff, + float sample_rate); + +// Basic voice activity detection (VAD) using audio energy adaptive threshold +bool vad_simple( + std::vector & pcmf32, + int sample_rate, + int last_ms, + float vad_thold, + float freq_thold, + bool verbose); + diff --git a/examples/gpt-2/CMakeLists.txt b/examples/gpt-2/CMakeLists.txt index 3b7ab5ef..2624ae60 100644 --- a/examples/gpt-2/CMakeLists.txt +++ b/examples/gpt-2/CMakeLists.txt @@ -3,11 +3,11 @@ set(TEST_TARGET gpt-2) add_executable(${TEST_TARGET} main.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils) +target_link_libraries(${TEST_TARGET} PRIVATE ggml common) # # gpt-2-quantize set(TEST_TARGET gpt-2-quantize) add_executable(${TEST_TARGET} quantize.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils) +target_link_libraries(${TEST_TARGET} PRIVATE ggml common) diff --git a/examples/gpt-2/main.cpp b/examples/gpt-2/main.cpp index c52cae1b..60975383 100644 --- a/examples/gpt-2/main.cpp +++ b/examples/gpt-2/main.cpp @@ -1,6 +1,6 @@ #include "ggml/ggml.h" -#include "utils.h" +#include "common.h" #include #include diff --git a/examples/gpt-2/quantize.cpp b/examples/gpt-2/quantize.cpp index fd9a22d5..28867d72 100644 --- a/examples/gpt-2/quantize.cpp +++ b/examples/gpt-2/quantize.cpp @@ -1,6 +1,6 @@ #include "ggml/ggml.h" -#include "utils.h" +#include "common.h" #include #include diff --git a/examples/gpt-j/CMakeLists.txt b/examples/gpt-j/CMakeLists.txt index 390746d5..195e813c 100644 --- a/examples/gpt-j/CMakeLists.txt +++ b/examples/gpt-j/CMakeLists.txt @@ -3,11 +3,11 @@ set(TEST_TARGET gpt-j) add_executable(${TEST_TARGET} main.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils) +target_link_libraries(${TEST_TARGET} PRIVATE ggml common) # # gpt-j-quantize set(TEST_TARGET gpt-j-quantize) add_executable(${TEST_TARGET} quantize.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils) +target_link_libraries(${TEST_TARGET} PRIVATE ggml common) diff --git a/examples/gpt-j/main.cpp b/examples/gpt-j/main.cpp index fbd7c314..57e0caf5 100644 --- a/examples/gpt-j/main.cpp +++ b/examples/gpt-j/main.cpp @@ -1,6 +1,6 @@ #include "ggml/ggml.h" -#include "utils.h" +#include "common.h" #include #include diff --git a/examples/gpt-j/quantize.cpp b/examples/gpt-j/quantize.cpp index 2fff95bd..e706f3a9 100644 --- a/examples/gpt-j/quantize.cpp +++ b/examples/gpt-j/quantize.cpp @@ -1,6 +1,6 @@ #include "ggml/ggml.h" -#include "utils.h" +#include "common.h" #include #include diff --git a/examples/mnist/CMakeLists.txt b/examples/mnist/CMakeLists.txt index 8df55a36..91b802ae 100644 --- a/examples/mnist/CMakeLists.txt +++ b/examples/mnist/CMakeLists.txt @@ -3,5 +3,5 @@ set(TEST_TARGET mnist) add_executable(${TEST_TARGET} main.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils) +target_link_libraries(${TEST_TARGET} PRIVATE ggml common) diff --git a/examples/mnist/main.cpp b/examples/mnist/main.cpp index 3206f2d0..e2ae0329 100644 --- a/examples/mnist/main.cpp +++ b/examples/mnist/main.cpp @@ -1,6 +1,6 @@ #include "ggml/ggml.h" -#include "utils.h" +#include "common.h" #include #include diff --git a/examples/stablelm/CMakeLists.txt b/examples/stablelm/CMakeLists.txt index b62b836d..59e97a9e 100644 --- a/examples/stablelm/CMakeLists.txt +++ b/examples/stablelm/CMakeLists.txt @@ -3,11 +3,11 @@ set(TEST_TARGET stablelm) add_executable(${TEST_TARGET} main.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils) +target_link_libraries(${TEST_TARGET} PRIVATE ggml common) # # stablelm-quantize set(TEST_TARGET stablelm-quantize) add_executable(${TEST_TARGET} quantize.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils) +target_link_libraries(${TEST_TARGET} PRIVATE ggml common) diff --git a/examples/stablelm/main.cpp b/examples/stablelm/main.cpp index f415bffb..879ba115 100644 --- a/examples/stablelm/main.cpp +++ b/examples/stablelm/main.cpp @@ -1,6 +1,6 @@ #include "ggml/ggml.h" -#include "utils.h" +#include "common.h" #include #include diff --git a/examples/stablelm/quantize.cpp b/examples/stablelm/quantize.cpp index 25d96168..3c8ca5e7 100644 --- a/examples/stablelm/quantize.cpp +++ b/examples/stablelm/quantize.cpp @@ -1,6 +1,6 @@ #include "ggml/ggml.h" -#include "utils.h" +#include "common.h" #include #include diff --git a/examples/utils.cpp b/examples/utils.cpp deleted file mode 100644 index 30057b7c..00000000 --- a/examples/utils.cpp +++ /dev/null @@ -1,330 +0,0 @@ -#include "utils.h" - -#include -#include - -bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { - for (int i = 1; i < argc; i++) { - std::string arg = argv[i]; - - if (arg == "-s" || arg == "--seed") { - params.seed = std::stoi(argv[++i]); - } else if (arg == "-t" || arg == "--threads") { - params.n_threads = std::stoi(argv[++i]); - } else if (arg == "-p" || arg == "--prompt") { - params.prompt = argv[++i]; - } else if (arg == "-n" || arg == "--n_predict") { - params.n_predict = std::stoi(argv[++i]); - } else if (arg == "--top_k") { - params.top_k = std::stoi(argv[++i]); - } else if (arg == "--top_p") { - params.top_p = std::stof(argv[++i]); - } else if (arg == "--temp") { - params.temp = std::stof(argv[++i]); - } else if (arg == "-b" || arg == "--batch_size") { - params.n_batch = std::stoi(argv[++i]); - } else if (arg == "-m" || arg == "--model") { - params.model = argv[++i]; - } else if (arg == "-h" || arg == "--help") { - gpt_print_usage(argc, argv, params); - exit(0); - } else { - fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); - gpt_print_usage(argc, argv, params); - exit(0); - } - } - - return true; -} - -void gpt_print_usage(int argc, char ** argv, const gpt_params & params) { - fprintf(stderr, "usage: %s [options]\n", argv[0]); - fprintf(stderr, "\n"); - fprintf(stderr, "options:\n"); - fprintf(stderr, " -h, --help show this help message and exit\n"); - fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n"); - fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); - fprintf(stderr, " -p PROMPT, --prompt PROMPT\n"); - fprintf(stderr, " prompt to start generation with (default: random)\n"); - fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d)\n", params.n_predict); - fprintf(stderr, " --top_k N top-k sampling (default: %d)\n", params.top_k); - fprintf(stderr, " --top_p N top-p sampling (default: %.1f)\n", params.top_p); - fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp); - fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); - fprintf(stderr, " -m FNAME, --model FNAME\n"); - fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); - fprintf(stderr, "\n"); -} - -std::string gpt_random_prompt(std::mt19937 & rng) { - const int r = rng() % 10; - switch (r) { - case 0: return "So"; - case 1: return "Once upon a time"; - case 2: return "When"; - case 3: return "The"; - case 4: return "After"; - case 5: return "If"; - case 6: return "import"; - case 7: return "He"; - case 8: return "She"; - case 9: return "They"; - default: return "To"; - } - - return "The"; -} - -void replace(std::string & str, const std::string & needle, const std::string & replacement) { - size_t pos = 0; - while ((pos = str.find(needle, pos)) != std::string::npos) { - str.replace(pos, needle.length(), replacement); - pos += replacement.length(); - } -} - -std::map json_parse(const std::string & fname) { - std::map result; - - // read file into string - std::string json; - { - std::ifstream ifs(fname); - if (!ifs) { - fprintf(stderr, "Failed to open %s\n", fname.c_str()); - exit(1); - } - - json = std::string((std::istreambuf_iterator(ifs)), - (std::istreambuf_iterator())); - } - - if (json[0] != '{') { - return result; - } - - // parse json - { - bool has_key = false; - bool in_token = false; - - std::string str_key = ""; - std::string str_val = ""; - - int n = json.size(); - for (int i = 1; i < n; ++i) { - if (!in_token) { - if (json[i] == ' ') continue; - if (json[i] == '"') { - in_token = true; - continue; - } - } else { - if (json[i] == '\\' && i+1 < n) { - if (has_key == false) { - str_key += json[i]; - } else { - str_val += json[i]; - } - ++i; - } else if (json[i] == '"') { - if (has_key == false) { - has_key = true; - ++i; - while (json[i] == ' ') ++i; - ++i; // : - while (json[i] == ' ') ++i; - if (json[i] != '\"') { - while (json[i] != ',' && json[i] != '}') { - str_val += json[i++]; - } - has_key = false; - } else { - in_token = true; - continue; - } - } else { - has_key = false; - } - - ::replace(str_key, "\\u0120", " " ); // \u0120 -> space - ::replace(str_key, "\\u010a", "\n"); // \u010a -> new line - ::replace(str_key, "\\\"", "\""); // \\\" -> " - - try { - result[str_key] = std::stoi(str_val); - } catch (...) { - //fprintf(stderr, "%s: ignoring key '%s' with value '%s'\n", fname.c_str(), str_key.c_str(), str_val.c_str()); - - } - str_key = ""; - str_val = ""; - in_token = false; - continue; - } - if (has_key == false) { - str_key += json[i]; - } else { - str_val += json[i]; - } - } - } - } - - return result; -} - -std::vector gpt_tokenize(const gpt_vocab & vocab, const std::string & text) { - std::vector words; - - // first split the text into words - { - std::string str = text; - std::string pat = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)"; - - std::regex re(pat); - std::smatch m; - - while (std::regex_search(str, m, re)) { - for (auto x : m) { - words.push_back(x); - } - str = m.suffix(); - } - } - - // find the longest tokens that form the words: - std::vector tokens; - for (const auto & word : words) { - if (word.size() == 0) continue; - - int i = 0; - int n = word.size(); - while (i < n) { - int j = n; - while (j > i) { - auto it = vocab.token_to_id.find(word.substr(i, j-i)); - if (it != vocab.token_to_id.end()) { - tokens.push_back(it->second); - i = j; - break; - } - --j; - } - if (i == n) { - break; - } - if (j == i) { - auto sub = word.substr(i, 1); - if (vocab.token_to_id.find(sub) != vocab.token_to_id.end()) { - tokens.push_back(vocab.token_to_id.at(sub)); - } else { - fprintf(stderr, "%s: unknown token '%s'\n", __func__, sub.data()); - } - ++i; - } - } - } - - return tokens; -} - -bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) { - printf("%s: loading vocab from '%s'\n", __func__, fname.c_str()); - - vocab.token_to_id = ::json_parse(fname); - - for (const auto & kv : vocab.token_to_id) { - vocab.id_to_token[kv.second] = kv.first; - } - - printf("%s: vocab size = %d\n", __func__, (int) vocab.token_to_id.size()); - - // print the vocabulary - //for (auto kv : vocab.token_to_id) { - // printf("'%s' -> %d\n", kv.first.data(), kv.second); - //} - - return true; -} - -gpt_vocab::id gpt_sample_top_k_top_p( - const gpt_vocab & vocab, - const float * logits, - int top_k, - double top_p, - double temp, - std::mt19937 & rng) { - int n_logits = vocab.id_to_token.size(); - - std::vector> logits_id; - logits_id.reserve(n_logits); - - { - const double scale = 1.0/temp; - for (int i = 0; i < n_logits; ++i) { - logits_id.push_back(std::make_pair(logits[i]*scale, i)); - } - } - - // find the top K tokens - std::partial_sort( - logits_id.begin(), - logits_id.begin() + top_k, logits_id.end(), - [](const std::pair & a, const std::pair & b) { - return a.first > b.first; - }); - - logits_id.resize(top_k); - - double maxl = -INFINITY; - for (const auto & kv : logits_id) { - maxl = std::max(maxl, kv.first); - } - - // compute probs for the top K tokens - std::vector probs; - probs.reserve(logits_id.size()); - - double sum = 0.0; - for (const auto & kv : logits_id) { - double p = exp(kv.first - maxl); - probs.push_back(p); - sum += p; - } - - // normalize the probs - for (auto & p : probs) { - p /= sum; - } - - if (top_p < 1.0f) { - double cumsum = 0.0f; - for (int i = 0; i < top_k; i++) { - cumsum += probs[i]; - if (cumsum >= top_p) { - top_k = i + 1; - probs.resize(top_k); - logits_id.resize(top_k); - break; - } - } - - cumsum = 1.0/cumsum; - for (int i = 0; i < (int) probs.size(); i++) { - probs[i] *= cumsum; - } - } - - //printf("\n"); - //for (int i = 0; i < (int) probs.size(); i++) { - // printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]); - //} - //exit(0); - - std::discrete_distribution<> dist(probs.begin(), probs.end()); - int idx = dist(rng); - - return logits_id[idx].second; -} diff --git a/examples/utils.h b/examples/utils.h deleted file mode 100644 index b61173ff..00000000 --- a/examples/utils.h +++ /dev/null @@ -1,83 +0,0 @@ -// Various helper functions and utilities - -#pragma once - -#include -#include -#include -#include -#include - -// -// CLI argument parsing -// - -struct gpt_params { - int32_t seed = -1; // RNG seed - int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); - int32_t n_predict = 200; // new tokens to predict - - // sampling parameters - int32_t top_k = 40; - float top_p = 0.9f; - float temp = 0.9f; - - int32_t n_batch = 8; // batch size for prompt processing - - std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path - std::string prompt; -}; - -bool gpt_params_parse(int argc, char ** argv, gpt_params & params); - -void gpt_print_usage(int argc, char ** argv, const gpt_params & params); - -std::string gpt_random_prompt(std::mt19937 & rng); - -// -// Vocab utils -// - -struct gpt_vocab { - using id = int32_t; - using token = std::string; - - std::map token_to_id; - std::map id_to_token; -}; - -void replace(std::string & str, const std::string & needle, const std::string & replacement); - -// poor-man's JSON parsing -std::map json_parse(const std::string & fname); - -// split text into tokens -// -// ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53 -// -// Regex (Python): -// r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""" -// -// Regex (C++): -// R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)" -// -std::vector gpt_tokenize(const gpt_vocab & vocab, const std::string & text); - -// load the tokens from encoder.json -bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab); - -// sample next token given probabilities for each embedding -// -// - consider only the top K tokens -// - from them, consider only the top tokens with cumulative probability > P -// -// TODO: not sure if this implementation is correct -// TODO: temperature is not implemented -// -gpt_vocab::id gpt_sample_top_k_top_p( - const gpt_vocab & vocab, - const float * logits, - int top_k, - double top_p, - double temp, - std::mt19937 & rng); diff --git a/examples/whisper/CMakeLists.txt b/examples/whisper/CMakeLists.txt index c7f5ff54..67500672 100644 --- a/examples/whisper/CMakeLists.txt +++ b/examples/whisper/CMakeLists.txt @@ -10,8 +10,8 @@ target_link_libraries(whisper-cpp PRIVATE ) set(TEST_TARGET whisper) -add_executable(${TEST_TARGET} main.cpp common.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE whisper-cpp) +add_executable(${TEST_TARGET} main.cpp) +target_link_libraries(${TEST_TARGET} PRIVATE whisper-cpp common) target_include_directories(${TEST_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..) # @@ -19,4 +19,4 @@ target_include_directories(${TEST_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/.. set(TEST_TARGET whisper-quantize) add_executable(${TEST_TARGET} quantize.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils) +target_link_libraries(${TEST_TARGET} PRIVATE ggml common) diff --git a/examples/whisper/common.cpp b/examples/whisper/common.cpp deleted file mode 100644 index 194ef0ec..00000000 --- a/examples/whisper/common.cpp +++ /dev/null @@ -1,162 +0,0 @@ -#include "common.h" - -// third-party utilities -// use your favorite implementations -#define DR_WAV_IMPLEMENTATION -#include "dr_wav.h" - -#include -#include - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - -std::string trim(const std::string & s) { - std::regex e("^\\s+|\\s+$"); - return std::regex_replace(s, e, ""); -} - -std::string replace(const std::string & s, const std::string & from, const std::string & to) { - std::string result = s; - size_t pos = 0; - while ((pos = result.find(from, pos)) != std::string::npos) { - result.replace(pos, from.length(), to); - pos += to.length(); - } - return result; -} - -bool read_wav(const std::string & fname, std::vector& pcmf32, std::vector>& pcmf32s, bool stereo) { - drwav wav; - std::vector wav_data; // used for pipe input from stdin - - if (fname == "-") { - { - uint8_t buf[1024]; - while (true) - { - const size_t n = fread(buf, 1, sizeof(buf), stdin); - if (n == 0) { - break; - } - wav_data.insert(wav_data.end(), buf, buf + n); - } - } - - if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) { - fprintf(stderr, "error: failed to open WAV file from stdin\n"); - return false; - } - - fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size()); - } - else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) { - fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str()); - return false; - } - - if (wav.channels != 1 && wav.channels != 2) { - fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", __func__, fname.c_str()); - return false; - } - - if (stereo && wav.channels != 2) { - fprintf(stderr, "%s: WAV file '%s' must be stereo for diarization\n", __func__, fname.c_str()); - return false; - } - - if (wav.sampleRate != COMMON_SAMPLE_RATE) { - fprintf(stderr, "%s: WAV file '%s' must be %i kHz\n", __func__, fname.c_str(), COMMON_SAMPLE_RATE/1000); - return false; - } - - if (wav.bitsPerSample != 16) { - fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", __func__, fname.c_str()); - return false; - } - - const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8); - - std::vector pcm16; - pcm16.resize(n*wav.channels); - drwav_read_pcm_frames_s16(&wav, n, pcm16.data()); - drwav_uninit(&wav); - - // convert to mono, float - pcmf32.resize(n); - if (wav.channels == 1) { - for (uint64_t i = 0; i < n; i++) { - pcmf32[i] = float(pcm16[i])/32768.0f; - } - } else { - for (uint64_t i = 0; i < n; i++) { - pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f; - } - } - - if (stereo) { - // convert to stereo, float - pcmf32s.resize(2); - - pcmf32s[0].resize(n); - pcmf32s[1].resize(n); - for (uint64_t i = 0; i < n; i++) { - pcmf32s[0][i] = float(pcm16[2*i])/32768.0f; - pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f; - } - } - - return true; -} - -void high_pass_filter(std::vector & data, float cutoff, float sample_rate) { - const float rc = 1.0f / (2.0f * M_PI * cutoff); - const float dt = 1.0f / sample_rate; - const float alpha = dt / (rc + dt); - - float y = data[0]; - - for (size_t i = 1; i < data.size(); i++) { - y = alpha * (y + data[i] - data[i - 1]); - data[i] = y; - } -} - -bool vad_simple(std::vector & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) { - const int n_samples = pcmf32.size(); - const int n_samples_last = (sample_rate * last_ms) / 1000; - - if (n_samples_last >= n_samples) { - // not enough samples - assume no speech - return false; - } - - if (freq_thold > 0.0f) { - high_pass_filter(pcmf32, freq_thold, sample_rate); - } - - float energy_all = 0.0f; - float energy_last = 0.0f; - - for (int i = 0; i < n_samples; i++) { - energy_all += fabsf(pcmf32[i]); - - if (i >= n_samples - n_samples_last) { - energy_last += fabsf(pcmf32[i]); - } - } - - energy_all /= n_samples; - energy_last /= n_samples_last; - - if (verbose) { - fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold); - } - - if (energy_last > vad_thold*energy_all) { - return false; - } - - return true; -} diff --git a/examples/whisper/common.h b/examples/whisper/common.h deleted file mode 100644 index 04dd7cbe..00000000 --- a/examples/whisper/common.h +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -// needs to match WHISPER_SAMPLE_RATE -#define COMMON_SAMPLE_RATE 16000 - -#include -#include - -std::string trim(const std::string & s); - -std::string replace( - const std::string & s, - const std::string & from, - const std::string & to); - -// Read WAV audio file and store the PCM data into pcmf32 -// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE -// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM -bool read_wav( - const std::string & fname, - std::vector & pcmf32, - std::vector> & pcmf32s, - bool stereo); - -// Apply a high-pass frequency filter to PCM audio -// Suppresses frequencies below cutoff Hz -void high_pass_filter( - std::vector & data, - float cutoff, - float sample_rate); - -// Basic voice activity detection (VAD) using audio energy adaptive threshold -bool vad_simple( - std::vector & pcmf32, - int sample_rate, - int last_ms, - float vad_thold, - float freq_thold, - bool verbose); - diff --git a/examples/whisper/quantize.cpp b/examples/whisper/quantize.cpp index ae3a5b8a..2af2a0d0 100644 --- a/examples/whisper/quantize.cpp +++ b/examples/whisper/quantize.cpp @@ -1,6 +1,6 @@ #include "ggml/ggml.h" -#include "utils.h" +#include "common.h" #include #include