From: Georgi Gerganov Date: Wed, 24 May 2023 07:40:27 +0000 (+0300) Subject: common : add missing declarations X-Git-Tag: upstream/0.0.1642~1446 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=47ff7d688520d24079e03667b9dcea68783b6c29;p=pkg%2Fggml%2Fsources%2Fggml common : add missing declarations --- diff --git a/examples/common.cpp b/examples/common.cpp index bb98d2c6..e30f524e 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -119,6 +119,10 @@ std::string replace(const std::string & s, const std::string & from, const std:: return result; } +void gpt_vocab::add_special_token(const std::string & token) { + special_tokens.push_back(token); +} + std::map json_parse(const std::string & fname) { std::map result; @@ -210,10 +214,6 @@ std::map json_parse(const std::string & fname) { return result; } -void gpt_vocab::add_special_token(const std::string & token) { - special_tokens.push_back(token); -} - std::string convert_to_utf8(const std::wstring & input) { std::wstring_convert> converter; return converter.to_bytes(input); @@ -226,7 +226,7 @@ std::wstring convert_to_wstring(const std::string & input) { std::vector gpt_tokenize(const gpt_vocab & vocab, const std::string & text) { std::vector words; - + // first split the text into words { std::string str = text; diff --git a/examples/common.h b/examples/common.h index 73b4a581..4a24ffbc 100644 --- a/examples/common.h +++ b/examples/common.h @@ -61,8 +61,9 @@ struct gpt_vocab { // poor-man's JSON parsing std::map json_parse(const std::string & fname); -// handle utf-8 coding -void utf8_to_string(std::string const & in, std::string & out); +std::string convert_to_utf8(const std::wstring & input); + +std::wstring convert_to_wstring(const std::string & input); // split text into tokens //