#include <regex>
#include <locale>
#include <codecvt>
+#include <sstream>
#ifndef M_PI
#define M_PI 3.14159265358979323846
if (params.prompt.back() == '\n') {
params.prompt.pop_back();
}
- } else {
+ } else if (arg == "-tt" || arg == "--token_test") {
+ params.token_test = argv[++i];
+ }
+ else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
gpt_print_usage(argc, argv, params);
exit(0);
fprintf(stderr, " prompt to start generation with (default: random)\n");
fprintf(stderr, " -f FNAME, --file FNAME\n");
fprintf(stderr, " load prompt from a file\n");
+ fprintf(stderr, " -tt TOKEN_TEST, --token_test TOKEN_TEST\n");
+ fprintf(stderr, " test tokenization\n");
fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d)\n", params.n_predict);
fprintf(stderr, " --top_k N top-k sampling (default: %d)\n", params.top_k);
fprintf(stderr, " --top_p N top-p sampling (default: %.1f)\n", params.top_p);
return converter.to_bytes(input);
}
+
std::wstring convert_to_wstring(const std::string & input) {
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
return converter.from_bytes(input);
}
}
- // find the longest tokens that form the words:
+ // find the longest token that forms each word in words:
std::vector<gpt_vocab::id> tokens;
for (const auto & word : words) {
- if (word.size() == 0) continue;
-
- int i = 0;
- int n = word.size();
- while (i < n) {
- int j = n;
- while (j > i) {
- auto it = vocab.token_to_id.find(word.substr(i, j-i));
- if (it != vocab.token_to_id.end()) {
+ for (int i = 0; i < word.size(); ){
+ for (int j = word.size() - 1; j >= i; j--){
+ auto cand = word.substr(i, j-i+1);
+ auto it = vocab.token_to_id.find(cand);
+ if (it != vocab.token_to_id.end()){ // word.substr(i, j-i+1) in vocab
tokens.push_back(it->second);
- i = j;
- j = n;
- continue;
+ i = j + 1;
+ break;
}
- --j;
- }
- if (i == n) {
- break;
- }
- if (j == i) {
- auto sub = word.substr(i, 1);
- if (vocab.token_to_id.find(sub) != vocab.token_to_id.end()) {
- tokens.push_back(vocab.token_to_id.at(sub));
- } else {
- fprintf(stderr, "%s: unknown token '%s'\n", __func__, sub.data());
+ else if (j == i){ // word.substr(i, 1) has no matching
+ fprintf(stderr, "%s: unknown token '%s'\n", __func__, word.substr(i, 1).data());
+ i++;
}
- ++i;
}
}
}
+
return tokens;
}
+std::vector<gpt_vocab::id> parse_tokens_from_string(const std::string& input, char delimiter) {
+ std::vector<gpt_vocab::id> output;
+ std::stringstream ss(input);
+ std::string token;
+
+ while (std::getline(ss, token, delimiter)) {
+ output.push_back(std::stoi(token));
+ }
+
+ return output;
+}
+
+std::map<std::string, std::vector<gpt_vocab::id>> extract_tests_from_file(const std::string & fpath_test){
+ if (fpath_test.empty()){
+ fprintf(stderr, "%s : No test file found.\n", __func__);
+ return std::map<std::string, std::vector<gpt_vocab::id>>();
+ }
+
+ std::map<std::string, std::vector<gpt_vocab::id>> tests;
+
+ auto fin = std::ifstream(fpath_test, std::ios_base::in);
+ const char * delimeter = " => ";
+ const char del_tok = ',';
+ std::string line;
+ while (std::getline(fin, line)) {
+ size_t delimiterPos = line.find(delimeter);
+ if (delimiterPos != std::string::npos) {
+ std::string text = line.substr(0, delimiterPos);
+ std::string s_tokens = line.substr(delimiterPos + std::strlen(delimeter));
+ tests[text] = parse_tokens_from_string(s_tokens, del_tok);
+ }
+ }
+ return tests;
+}
+
+void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test){
+ std::map<std::string, std::vector<gpt_vocab::id>> tests = extract_tests_from_file(fpath_test);
+
+ size_t n_fails = 0;
+
+ for (const auto & test : tests) {
+ std::vector<gpt_vocab::id> tokens = gpt_tokenize(vocab, test.first);
+
+ if (tokens != test.second){
+ n_fails++;
+
+ // print out failure cases
+ fprintf(stderr, "%s : failed test: '%s'\n", __func__, test.first.c_str());
+ fprintf(stderr, "%s : tokens in hf: ", __func__);
+ for (const auto & t : test.second) {
+ fprintf(stderr, "%s(%d), ", vocab.id_to_token[t].c_str(), t);
+ }
+ fprintf(stderr, "\n");
+ fprintf(stderr, "%s : tokens in ggml: ", __func__);
+ for (const auto & t : tokens) {
+ fprintf(stderr, "%s(%d), ", vocab.id_to_token[t].c_str(), t);
+ }
+ fprintf(stderr, "\n");
+ }
+ }
+
+ fprintf(stderr, "%s : %lu tests failed out of %lu tests.\n", __func__, n_fails, tests.size());
+}
+
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
printf("%s: loading vocab from '%s'\n", __func__, fname.c_str());
int32_t n_batch = 8; // batch size for prompt processing
- std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path
- std::string prompt;
+ std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path
+ std::string prompt = "";
+ std::string token_test = "";
};
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
//
std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text);
+// test outputs of gpt_tokenize
+//
+// - compare with tokens generated by the huggingface tokenizer
+// - test cases are chosen based on the model's main language (under 'prompt' directory)
+// - if all sentences are tokenized identically, print 'All tests passed.'
+// - otherwise, print sentence, huggingface tokens, ggml tokens
+//
+void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test);
+
// load the tokens from encoder.json
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab);
}
t_load_us = ggml_time_us() - t_start_us;
+
+ test_gpt_tokenizer(vocab, params.token_test);
}
int n_past = 0;
}
t_load_us = ggml_time_us() - t_start_us;
+
+ test_gpt_tokenizer(vocab, params.token_test);
}
int n_past = 0;
}
t_load_us = ggml_time_us() - t_start_us;
+
+ test_gpt_tokenizer(vocab, params.token_test);
}
int n_past = 0;
dir_model = sys.argv[1]
fname_out = sys.argv[1] + "/ggml-model.bin"
-with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
- encoder = json.load(f)
-
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
hparams = json.load(f)
tokenizer = AutoTokenizer.from_pretrained(dir_model)
model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True)
-#print (model)
-
-#print(tokenizer.encode('I believe the meaning of life is'))
list_vars = model.state_dict()
for name in list_vars.keys():
fout.write(struct.pack("i", ftype))
# TODO: temporary hack to not deal with implementing the tokenizer
-dot_token = tokenizer.encode('.')[0]
for i in range(hparams["vocab_size"]):
- text = tokenizer.decode([dot_token, i]).encode('utf-8')
- # remove the first byte (it's always '.')
- text = text[1:]
+ text = tokenizer.decode([i]).encode('utf-8')
fout.write(struct.pack("i", len(text)))
fout.write(text)
print(" Skipping variable: " + name)
continue
- n_dims = len(data.shape);
+ n_dims = len(data.shape)
# ftype == 0 -> float32, ftype == 1 -> float16
- ftype_cur = 0;
+ ftype_cur = 0
if ftype != 0:
if name[-7:] == ".weight" and n_dims == 2:
print(" Converting to float16")
fout.write(struct.pack("iii", n_dims, len(str), ftype_cur))
for i in range(n_dims):
fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
- fout.write(str);
+ fout.write(str)
# data
data.tofile(fout)
}
t_load_us = ggml_time_us() - t_start_us;
+
+ test_gpt_tokenizer(vocab, params.token_test);
}
int n_past = 0;
ggml_free(model.ctx);
return 0;
-}
+}
\ No newline at end of file
std::string model = ""; // model path
std::string prompt = "";
+ std::string token_test = "";
bool perplexity = false;
fprintf(stderr, " prompt to start generation with (default: random)\n");
fprintf(stderr, " -f FNAME, --file FNAME\n");
fprintf(stderr, " load prompt from a file\n");
+ fprintf(stderr, " -tt TOKEN_TEST, --token_test TOKEN_TEST\n");
+ fprintf(stderr, " test tokenization\n");
fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d)\n", params.n_predict);
fprintf(stderr, " --top_k N top-k sampling (default: %d, 0 = n_vocab)\n", params.top_k);
fprintf(stderr, " --top_p N top-p sampling (default: %.2f)\n", params.top_p);
if (params.prompt.back() == '\n') {
params.prompt.pop_back();
}
+ } else if (arg == "-tt" || arg == "--token_test") {
+ params.token_test = argv[++i];
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
mpt_print_usage(argc, argv, params);
return probs;
}
-int perplexity(mpt_params params) {
+int perplexity(const mpt_params & params) {
ggml_time_init();
const int64_t t_main_start_us = ggml_time_us();
}
t_load_us = ggml_time_us() - t_start_us;
+
+ test_gpt_tokenizer(vocab, params.token_test);
}
if (params.top_k == 0) {
--- /dev/null
+Hello World! => 12092,3645,2
+I can't believe it's already Friday!" => 42,476,626,2868,352,434,2168,6794,1476
+The URL for the website is https://www.example.com." => 510,10611,323,253,4422,310,5987,1358,2700,15,11667,15,681,449
+"She said, 'I love to travel.'" => 3,2993,753,13,686,42,2389,281,4288,18574
+'The temperature is 25.5°C.' => 8,510,3276,310,2030,15,22,3272,36,2464
+"Let's meet at 2:30 p.m. in the park." => 3,1466,434,2525,387,374,27,1229,268,15,78,15,275,253,5603,449
+The book costs $19.99 => 510,1984,4815,370,746,15,1525
+"John's favorite color is blue." => 3,8732,434,7583,3295,310,4797,449
+Th@nk y0u f0r y0ur h3lp! => 1044,33,30664,340,17,86,269,17,83,340,17,321,288,20,24343,2
+C@n I g3t a c0ffee, pl3@se? => 36,33,79,309,305,20,85,247,260,17,71,6851,13,499,20,33,339,32
+W0w! Th@t's @m@zing! => 56,17,88,2,596,33,85,434,1214,78,33,8537,2
+H0w 4re y0u t0d@y? => 41,17,88,577,250,340,17,86,246,17,69,33,90,32
+I l0ve t0 tr@vel @r0und the w0rld. => 42,298,17,306,246,17,492,33,652,1214,83,17,1504,253,259,17,83,392,15
+Wh@t's y0ur f@v0rite m0vie? => 3152,33,85,434,340,17,321,269,33,87,17,3852,278,17,25858,32
+The cat is sleeping on the mat. => 510,5798,310,14343,327,253,1111,15
+I need to buy some groceries for dinner. => 42,878,281,4489,690,45160,447,323,8955,15
+The sun is shining brightly in the sky. => 510,5101,310,28115,43925,275,253,8467,15
+She is reading a book in the park. => 2993,310,4361,247,1984,275,253,5603,15
+We went for a walk on the beach yesterday. => 1231,2427,323,247,2940,327,253,11600,11066,15
+He plays the guitar like a pro. => 1328,7120,253,12609,751,247,354,15
+They are going to the movies tonight. => 3726,403,1469,281,253,11321,11608,15
+The flowers are blooming in the garden. => 510,12405,403,30601,272,275,253,10329,15
+I enjoy listening to classical music. => 42,4264,11298,281,8946,3440,15
+We need to buy groceries for the week. => 1231,878,281,4489,45160,447,323,253,2129,15
+The dog is chasing its tail in circles. => 510,4370,310,31702,697,8105,275,14240,15
+She is wearing a beautiful red dress. => 2993,310,9398,247,5389,2502,7619,15
+He is a talented actor in Hollywood. => 1328,310,247,21220,12353,275,14759,15
+The children are playing in the playground. => 510,2151,403,4882,275,253,41008,15
+I'm going to visit my grandparents this weekend. => 42,1353,1469,281,4143,619,37186,436,8849,15
+The coffee tastes bitter without sugar. => 510,8574,27491,17123,1293,8618,15
+They are planning a surprise party for her. => 3726,403,7219,247,9326,3128,323,617,15
+She sings like an angel on stage. => 2993,44718,751,271,23087,327,3924,15
+We should take a vacation to relax. => 1231,943,1379,247,18125,281,7921,15
+He is studying medicine at the university. => 1328,310,12392,9921,387,253,9835,15
+The rain is pouring heavily outside. => 510,9313,310,31226,11306,3345,15
+I enjoy watching romantic movies. => 42,4264,7487,18109,11321,15
+They are celebrating their anniversary today. => 3726,403,28765,616,19054,3063,15
+She dances gracefully to the music. => 2993,47078,14426,2920,281,253,3440,15
+He is an excellent basketball player. => 1328,310,271,7126,14648,4760,15
+The baby is sleeping soundly in the crib. => 510,6858,310,14343,3590,314,275,253,260,725,15
+I need to finish my homework before dinner. => 42,878,281,8416,619,32110,1078,8955,15
+They are organizing a charity event next month. => 3726,403,26169,247,19489,2362,1735,1770,15
+She is cooking a delicious meal for us. => 2993,310,12398,247,17319,11484,323,441,15
+We should go hiking in the mountains. => 1231,943,564,33061,275,253,14700,15
+The car broke down on the way to work. => 510,1113,9377,1066,327,253,1039,281,789,15
+He loves playing video games in his free time. => 1328,14528,4882,3492,3958,275,521,1959,673,15
+The birds are chirping in the trees. => 510,11260,403,36494,14650,275,253,7139,15
+I want to learn how to play the piano. => 42,971,281,3037,849,281,1132,253,18542,15
+They are building a new shopping mall in the city. => 3726,403,3652,247,747,12701,28974,275,253,2846,15
+She is writing a novel in her spare time. => 2993,310,4028,247,4460,275,617,18345,673,15
+We are going to the zoo this Saturday. => 1231,403,1469,281,253,41089,436,7814,15
+The cake looks delicious with chocolate frosting. => 510,15221,4453,17319,342,14354,34724,272,15
+He is a talented painter who sells his artwork. => 1328,310,247,21220,27343,665,27924,521,28227,15
+The students are studying for their exams. => 510,3484,403,12392,323,616,34666,15
+I enjoy swimming in the ocean. => 42,4264,17120,275,253,12927,15
+They are renovating their house. => 3726,403,30074,839,616,2419,15
+She is practicing yoga to stay healthy. => 2993,310,25815,25551,281,3297,5875,15
+We should plant flowers in the garden. => 1231,943,4444,12405,275,253,10329,15
+The traffic is heavy during rush hour. => 510,7137,310,5536,1309,16949,4964,15
+He is a skilled chef who creates amazing dishes. => 1328,310,247,18024,26540,665,10513,8644,17114,15
+The baby is crawling on the floor. => 510,6858,310,44922,327,253,5254,15
+I need to buy a new pair of shoes. => 42,878,281,4489,247,747,4667,273,12682,15
+They are going on a road trip across the country. => 3726,403,1469,327,247,3971,7408,2439,253,2586,15
+She is playing the piano beautifully. => 2993,310,4882,253,18542,27839,15
+We are going to a concert tomorrow night. => 1231,403,1469,281,247,12699,10873,2360,15
+The cake tastes delicious with vanilla frosting. => 510,15221,27491,17319,342,26724,34724,272,15
+He is a dedicated teacher who inspires his students. => 1328,310,247,9940,9732,665,6381,2731,521,3484,15
+The students are participating in a science fair. => 510,3484,403,15299,275,247,5859,4344,15
+I enjoy hiking in the mountains. => 42,4264,33061,275,253,14700,15
+They are organizing a beach cleanup next weekend. => 3726,403,26169,247,11600,34709,1735,8849,15
+She is taking photographs of nature. => 2993,310,3192,15928,273,3753,15
+We should try a new restaurant in town. => 1231,943,1611,247,747,10301,275,3874,15
+The traffic is moving slowly on the highway. => 510,7137,310,4886,7808,327,253,17657,15
+He is a talented singer with a beautiful voice. => 1328,310,247,21220,16057,342,247,5389,4318,15
+The baby is laughing and giggling. => 510,6858,310,17053,285,41542,1981,15
+I need to do laundry and wash my clothes. => 42,878,281,513,29023,285,14841,619,10015,15
+They are planning a trip to Europe. => 3726,403,7219,247,7408,281,3060,15
+She is learning how to play the guitar. => 2993,310,4715,849,281,1132,253,12609,15
+We are going to a museum this Sunday. => 1231,403,1469,281,247,16064,436,6926,15
+The coffee smells amazing in the morning. => 510,8574,34247,8644,275,253,4131,15
+He is a hardworking farmer who grows crops. => 1328,310,247,1892,21107,24718,665,17202,19492,15
+The students are presenting their research projects. => 510,3484,403,15250,616,2561,6493,15
+I enjoy playing soccer with my friends. => 42,4264,4882,20391,342,619,3858,15
+They are volunteering at a local shelter. => 3726,403,10057,2158,387,247,1980,17824,15
+She is practicing martial arts for self-defense. => 2993,310,25815,29731,14635,323,1881,14,29337,15
+We should try a new recipe for dinner. => 1231,943,1611,247,747,13612,323,8955,15
+The traffic is congest => 510,7137,310,25801
+The sun is shining brightly today. => 510,5101,310,28115,43925,3063,15
+I enjoy reading books in my free time. => 42,4264,4361,5098,275,619,1959,673,15
+She plays the piano beautifully. => 2993,7120,253,18542,27839,15
+The cat chased the mouse around the room. => 510,5798,40754,253,6521,1475,253,2316,15
+I love eating pizza with extra cheese. => 42,2389,9123,22534,342,4465,12173,15
+He always wears a hat wherever he goes. => 1328,1900,31394,247,7856,20312,344,4566,15
+The flowers in the garden are blooming. => 510,12405,275,253,10329,403,30601,272,15
+She danced gracefully on the stage. => 2993,39860,14426,2920,327,253,3924,15
+The dog barked loudly in the park. => 510,4370,21939,264,31311,275,253,5603,15
+We went swimming in the ocean yesterday. => 1231,2427,17120,275,253,12927,11066,15
+He speaks fluent French and Spanish. => 1328,16544,2938,290,5112,285,9883,15
+The train arrived at the station on time. => 510,6194,7244,387,253,4660,327,673,15
+She cooked a delicious meal for her family. => 2993,18621,247,17319,11484,323,617,2021,15
--- /dev/null
+请问洗手间在哪里? => 6435,7309,3819,2797,7313,1762,1525,7027,8043
--- /dev/null
+Hello World! => 15496,2159,0
+I can't believe it's already Friday!" => 40,460,470,1975,340,338,1541,3217,2474
+The URL for the website is https://www.example.com." => 464,10289,329,262,3052,318,3740,1378,2503,13,20688,13,785,526
+"She said, 'I love to travel.'" => 1,3347,531,11,705,40,1842,284,3067,11496
+'The temperature is 25.5°C.' => 6,464,5951,318,1679,13,20,7200,34,2637
+"Let's meet at 2:30 p.m. in the park." => 1,5756,338,1826,379,362,25,1270,279,13,76,13,287,262,3952,526
+The book costs $19.99 => 464,1492,3484,720,1129,13,2079
+"John's favorite color is blue." => 1,7554,338,4004,3124,318,4171,526
+Th@nk y0u f0r y0ur h3lp! => 817,31,77,74,331,15,84,277,15,81,331,15,333,289,18,34431,0
+C@n I g3t a c0ffee, pl3@se? => 34,31,77,314,308,18,83,257,269,15,5853,11,458,18,31,325,30
+W0w! Th@t's @m@zing! => 54,15,86,0,536,31,83,338,2488,76,31,9510,0
+H0w 4re y0u t0d@y? => 39,15,86,604,260,331,15,84,256,15,67,31,88,30
+I l0ve t0 tr@vel @r0und the w0rld. => 40,300,15,303,256,15,491,31,626,2488,81,15,917,262,266,15,81,335,13
+Wh@t's y0ur f@v0rite m0vie? => 1199,31,83,338,331,15,333,277,31,85,15,6525,285,15,85,494,30
+The cat is sleeping on the mat. => 464,3797,318,11029,319,262,2603,13
+I need to buy some groceries for dinner. => 40,761,284,2822,617,38464,329,8073,13
+The sun is shining brightly in the sky. => 464,4252,318,22751,35254,287,262,6766,13
+She is reading a book in the park. => 3347,318,3555,257,1492,287,262,3952,13
+We went for a walk on the beach yesterday. => 1135,1816,329,257,2513,319,262,10481,7415,13
+He plays the guitar like a pro. => 1544,5341,262,10047,588,257,386,13
+They are going to the movies tonight. => 2990,389,1016,284,262,6918,9975,13
+The flowers are blooming in the garden. => 464,12734,389,24924,3383,287,262,11376,13
+I enjoy listening to classical music. => 40,2883,8680,284,15993,2647,13
+We need to buy groceries for the week. => 1135,761,284,2822,38464,329,262,1285,13
+The dog is chasing its tail in circles. => 464,3290,318,20023,663,7894,287,13332,13
+She is wearing a beautiful red dress. => 3347,318,5762,257,4950,2266,6576,13
+He is a talented actor in Hollywood. => 1544,318,257,12356,8674,287,8502,13
+The children are playing in the playground. => 464,1751,389,2712,287,262,24817,13
+I'm going to visit my grandparents this weekend. => 40,1101,1016,284,3187,616,28571,428,5041,13
+The coffee tastes bitter without sugar. => 464,6891,18221,12922,1231,7543,13
+They are planning a surprise party for her. => 2990,389,5410,257,5975,2151,329,607,13
+She sings like an angel on stage. => 3347,33041,588,281,18304,319,3800,13
+We should take a vacation to relax. => 1135,815,1011,257,14600,284,8960,13
+He is studying medicine at the university. => 1544,318,11065,9007,379,262,6403,13
+The rain is pouring heavily outside. => 464,6290,318,23147,7272,2354,13
+I enjoy watching romantic movies. => 40,2883,4964,14348,6918,13
+They are celebrating their anniversary today. => 2990,389,17499,511,11162,1909,13
+She dances gracefully to the music. => 3347,38207,11542,2759,284,262,2647,13
+He is an excellent basketball player. => 1544,318,281,6275,9669,2137,13
+The baby is sleeping soundly in the crib. => 464,5156,318,11029,2128,306,287,262,48083,13
+I need to finish my homework before dinner. => 40,761,284,5461,616,26131,878,8073,13
+They are organizing a charity event next month. => 2990,389,16924,257,11016,1785,1306,1227,13
+She is cooking a delicious meal for us. => 3347,318,10801,257,12625,9799,329,514,13
+We should go hiking in the mountains. => 1135,815,467,24522,287,262,12269,13
+The car broke down on the way to work. => 464,1097,6265,866,319,262,835,284,670,13
+He loves playing video games in his free time. => 1544,10408,2712,2008,1830,287,465,1479,640,13
+The birds are chirping in the trees. => 464,10087,389,442,343,13886,287,262,7150,13
+I want to learn how to play the piano. => 40,765,284,2193,703,284,711,262,19132,13
+They are building a new shopping mall in the city. => 2990,389,2615,257,649,9735,17374,287,262,1748,13
+She is writing a novel in her spare time. => 3347,318,3597,257,5337,287,607,13952,640,13
+We are going to the zoo this Saturday. => 1135,389,1016,284,262,26626,428,3909,13
+The cake looks delicious with chocolate frosting. => 464,12187,3073,12625,351,11311,21682,278,13
+He is a talented painter who sells his artwork. => 1544,318,257,12356,34537,508,16015,465,16257,13
+The students are studying for their exams. => 464,2444,389,11065,329,511,26420,13
+I enjoy swimming in the ocean. => 40,2883,14899,287,262,9151,13
+They are renovating their house. => 2990,389,24317,803,511,2156,13
+She is practicing yoga to stay healthy. => 3347,318,18207,20351,284,2652,5448,13
+We should plant flowers in the garden. => 1135,815,4618,12734,287,262,11376,13
+The traffic is heavy during rush hour. => 464,4979,318,4334,1141,10484,1711,13
+He is a skilled chef who creates amazing dishes. => 1544,318,257,14297,21221,508,8075,4998,16759,13
+The baby is crawling on the floor. => 464,5156,318,34499,319,262,4314,13
+I need to buy a new pair of shoes. => 40,761,284,2822,257,649,5166,286,10012,13
+They are going on a road trip across the country. => 2990,389,1016,319,257,2975,5296,1973,262,1499,13
+She is playing the piano beautifully. => 3347,318,2712,262,19132,21104,13
+We are going to a concert tomorrow night. => 1135,389,1016,284,257,10010,9439,1755,13
+The cake tastes delicious with vanilla frosting. => 464,12187,18221,12625,351,16858,21682,278,13
+He is a dedicated teacher who inspires his students. => 1544,318,257,7256,4701,508,38934,465,2444,13
+The students are participating in a science fair. => 464,2444,389,11983,287,257,3783,3148,13
+I enjoy hiking in the mountains. => 40,2883,24522,287,262,12269,13
+They are organizing a beach cleanup next weekend. => 2990,389,16924,257,10481,27425,1306,5041,13
+She is taking photographs of nature. => 3347,318,2263,12566,286,3450,13
+We should try a new restaurant in town. => 1135,815,1949,257,649,7072,287,3240,13
+The traffic is moving slowly on the highway. => 464,4979,318,3867,6364,319,262,12763,13
+He is a talented singer with a beautiful voice. => 1544,318,257,12356,14015,351,257,4950,3809,13
+The baby is laughing and giggling. => 464,5156,318,14376,290,30442,1359,13
+I need to do laundry and wash my clothes. => 40,761,284,466,25724,290,13502,616,8242,13
+They are planning a trip to Europe. => 2990,389,5410,257,5296,284,2031,13
+She is learning how to play the guitar. => 3347,318,4673,703,284,711,262,10047,13
+We are going to a museum this Sunday. => 1135,389,1016,284,257,13257,428,3502,13
+The coffee smells amazing in the morning. => 464,6891,25760,4998,287,262,3329,13
+He is a hardworking farmer who grows crops. => 1544,318,257,1327,16090,18739,508,13676,14450,13
+The students are presenting their research projects. => 464,2444,389,17728,511,2267,4493,13
+I enjoy playing soccer with my friends. => 40,2883,2712,11783,351,616,2460,13
+They are volunteering at a local shelter. => 2990,389,41434,379,257,1957,11772,13
+She is practicing martial arts for self-defense. => 3347,318,18207,15618,10848,329,2116,12,19774,13
+We should try a new recipe for dinner. => 1135,815,1949,257,649,8364,329,8073,13
+The traffic is congest => 464,4979,318,22791
+The sun is shining brightly today. => 464,4252,318,22751,35254,1909,13
+I enjoy reading books in my free time. => 40,2883,3555,3835,287,616,1479,640,13
+She plays the piano beautifully. => 3347,5341,262,19132,21104,13
+The cat chased the mouse around the room. => 464,3797,26172,262,10211,1088,262,2119,13
+I love eating pizza with extra cheese. => 40,1842,6600,14256,351,3131,9891,13
+He always wears a hat wherever he goes. => 1544,1464,17326,257,6877,14530,339,2925,13
+The flowers in the garden are blooming. => 464,12734,287,262,11376,389,24924,3383,13
+She danced gracefully on the stage. => 3347,39480,11542,2759,319,262,3800,13
+The dog barked loudly in the park. => 464,3290,21405,276,23112,287,262,3952,13
+We went swimming in the ocean yesterday. => 1135,1816,14899,287,262,9151,7415,13
+He speaks fluent French and Spanish. => 1544,9209,43472,4141,290,7897,13
+The train arrived at the station on time. => 464,4512,5284,379,262,4429,319,640,13
+She cooked a delicious meal for her family. => 3347,15847,257,12625,9799,329,607,1641,13
--- /dev/null
+Hello World! => 15496,2159,0
+I can't believe it's already Friday!" => 40,460,470,1975,340,338,1541,3217,2474
+The URL for the website is https://www.example.com." => 464,10289,329,262,3052,318,3740,1378,2503,13,20688,13,785,526
+"She said, 'I love to travel.'" => 1,3347,531,11,705,40,1842,284,3067,11496
+'The temperature is 25.5°C.' => 6,464,5951,318,1679,13,20,7200,34,2637
+"Let's meet at 2:30 p.m. in the park." => 1,5756,338,1826,379,362,25,1270,279,13,76,13,287,262,3952,526
+The book costs $19.99 => 464,1492,3484,720,1129,13,2079
+"John's favorite color is blue." => 1,7554,338,4004,3124,318,4171,526
+Th@nk y0u f0r y0ur h3lp! => 817,31,77,74,331,15,84,277,15,81,331,15,333,289,18,34431,0
+C@n I g3t a c0ffee, pl3@se? => 34,31,77,314,308,18,83,257,269,15,5853,11,458,18,31,325,30
+W0w! Th@t's @m@zing! => 54,15,86,0,536,31,83,338,2488,76,31,9510,0
+H0w 4re y0u t0d@y? => 39,15,86,604,260,331,15,84,256,15,67,31,88,30
+I l0ve t0 tr@vel @r0und the w0rld. => 40,300,15,303,256,15,491,31,626,2488,81,15,917,262,266,15,81,335,13
+Wh@t's y0ur f@v0rite m0vie? => 1199,31,83,338,331,15,333,277,31,85,15,6525,285,15,85,494,30
+The cat is sleeping on the mat. => 464,3797,318,11029,319,262,2603,13
+I need to buy some groceries for dinner. => 40,761,284,2822,617,38464,329,8073,13
+The sun is shining brightly in the sky. => 464,4252,318,22751,35254,287,262,6766,13
+She is reading a book in the park. => 3347,318,3555,257,1492,287,262,3952,13
+We went for a walk on the beach yesterday. => 1135,1816,329,257,2513,319,262,10481,7415,13
+He plays the guitar like a pro. => 1544,5341,262,10047,588,257,386,13
+They are going to the movies tonight. => 2990,389,1016,284,262,6918,9975,13
+The flowers are blooming in the garden. => 464,12734,389,24924,3383,287,262,11376,13
+I enjoy listening to classical music. => 40,2883,8680,284,15993,2647,13
+We need to buy groceries for the week. => 1135,761,284,2822,38464,329,262,1285,13
+The dog is chasing its tail in circles. => 464,3290,318,20023,663,7894,287,13332,13
+She is wearing a beautiful red dress. => 3347,318,5762,257,4950,2266,6576,13
+He is a talented actor in Hollywood. => 1544,318,257,12356,8674,287,8502,13
+The children are playing in the playground. => 464,1751,389,2712,287,262,24817,13
+I'm going to visit my grandparents this weekend. => 40,1101,1016,284,3187,616,28571,428,5041,13
+The coffee tastes bitter without sugar. => 464,6891,18221,12922,1231,7543,13
+They are planning a surprise party for her. => 2990,389,5410,257,5975,2151,329,607,13
+She sings like an angel on stage. => 3347,33041,588,281,18304,319,3800,13
+We should take a vacation to relax. => 1135,815,1011,257,14600,284,8960,13
+He is studying medicine at the university. => 1544,318,11065,9007,379,262,6403,13
+The rain is pouring heavily outside. => 464,6290,318,23147,7272,2354,13
+I enjoy watching romantic movies. => 40,2883,4964,14348,6918,13
+They are celebrating their anniversary today. => 2990,389,17499,511,11162,1909,13
+She dances gracefully to the music. => 3347,38207,11542,2759,284,262,2647,13
+He is an excellent basketball player. => 1544,318,281,6275,9669,2137,13
+The baby is sleeping soundly in the crib. => 464,5156,318,11029,2128,306,287,262,48083,13
+I need to finish my homework before dinner. => 40,761,284,5461,616,26131,878,8073,13
+They are organizing a charity event next month. => 2990,389,16924,257,11016,1785,1306,1227,13
+She is cooking a delicious meal for us. => 3347,318,10801,257,12625,9799,329,514,13
+We should go hiking in the mountains. => 1135,815,467,24522,287,262,12269,13
+The car broke down on the way to work. => 464,1097,6265,866,319,262,835,284,670,13
+He loves playing video games in his free time. => 1544,10408,2712,2008,1830,287,465,1479,640,13
+The birds are chirping in the trees. => 464,10087,389,442,343,13886,287,262,7150,13
+I want to learn how to play the piano. => 40,765,284,2193,703,284,711,262,19132,13
+They are building a new shopping mall in the city. => 2990,389,2615,257,649,9735,17374,287,262,1748,13
+She is writing a novel in her spare time. => 3347,318,3597,257,5337,287,607,13952,640,13
+We are going to the zoo this Saturday. => 1135,389,1016,284,262,26626,428,3909,13
+The cake looks delicious with chocolate frosting. => 464,12187,3073,12625,351,11311,21682,278,13
+He is a talented painter who sells his artwork. => 1544,318,257,12356,34537,508,16015,465,16257,13
+The students are studying for their exams. => 464,2444,389,11065,329,511,26420,13
+I enjoy swimming in the ocean. => 40,2883,14899,287,262,9151,13
+They are renovating their house. => 2990,389,24317,803,511,2156,13
+She is practicing yoga to stay healthy. => 3347,318,18207,20351,284,2652,5448,13
+We should plant flowers in the garden. => 1135,815,4618,12734,287,262,11376,13
+The traffic is heavy during rush hour. => 464,4979,318,4334,1141,10484,1711,13
+He is a skilled chef who creates amazing dishes. => 1544,318,257,14297,21221,508,8075,4998,16759,13
+The baby is crawling on the floor. => 464,5156,318,34499,319,262,4314,13
+I need to buy a new pair of shoes. => 40,761,284,2822,257,649,5166,286,10012,13
+They are going on a road trip across the country. => 2990,389,1016,319,257,2975,5296,1973,262,1499,13
+She is playing the piano beautifully. => 3347,318,2712,262,19132,21104,13
+We are going to a concert tomorrow night. => 1135,389,1016,284,257,10010,9439,1755,13
+The cake tastes delicious with vanilla frosting. => 464,12187,18221,12625,351,16858,21682,278,13
+He is a dedicated teacher who inspires his students. => 1544,318,257,7256,4701,508,38934,465,2444,13
+The students are participating in a science fair. => 464,2444,389,11983,287,257,3783,3148,13
+I enjoy hiking in the mountains. => 40,2883,24522,287,262,12269,13
+They are organizing a beach cleanup next weekend. => 2990,389,16924,257,10481,27425,1306,5041,13
+She is taking photographs of nature. => 3347,318,2263,12566,286,3450,13
+We should try a new restaurant in town. => 1135,815,1949,257,649,7072,287,3240,13
+The traffic is moving slowly on the highway. => 464,4979,318,3867,6364,319,262,12763,13
+He is a talented singer with a beautiful voice. => 1544,318,257,12356,14015,351,257,4950,3809,13
+The baby is laughing and giggling. => 464,5156,318,14376,290,30442,1359,13
+I need to do laundry and wash my clothes. => 40,761,284,466,25724,290,13502,616,8242,13
+They are planning a trip to Europe. => 2990,389,5410,257,5296,284,2031,13
+She is learning how to play the guitar. => 3347,318,4673,703,284,711,262,10047,13
+We are going to a museum this Sunday. => 1135,389,1016,284,257,13257,428,3502,13
+The coffee smells amazing in the morning. => 464,6891,25760,4998,287,262,3329,13
+He is a hardworking farmer who grows crops. => 1544,318,257,1327,16090,18739,508,13676,14450,13
+The students are presenting their research projects. => 464,2444,389,17728,511,2267,4493,13
+I enjoy playing soccer with my friends. => 40,2883,2712,11783,351,616,2460,13
+They are volunteering at a local shelter. => 2990,389,41434,379,257,1957,11772,13
+She is practicing martial arts for self-defense. => 3347,318,18207,15618,10848,329,2116,12,19774,13
+We should try a new recipe for dinner. => 1135,815,1949,257,649,8364,329,8073,13
+The traffic is congest => 464,4979,318,22791
+The sun is shining brightly today. => 464,4252,318,22751,35254,1909,13
+I enjoy reading books in my free time. => 40,2883,3555,3835,287,616,1479,640,13
+She plays the piano beautifully. => 3347,5341,262,19132,21104,13
+The cat chased the mouse around the room. => 464,3797,26172,262,10211,1088,262,2119,13
+I love eating pizza with extra cheese. => 40,1842,6600,14256,351,3131,9891,13
+He always wears a hat wherever he goes. => 1544,1464,17326,257,6877,14530,339,2925,13
+The flowers in the garden are blooming. => 464,12734,287,262,11376,389,24924,3383,13
+She danced gracefully on the stage. => 3347,39480,11542,2759,319,262,3800,13
+The dog barked loudly in the park. => 464,3290,21405,276,23112,287,262,3952,13
+We went swimming in the ocean yesterday. => 1135,1816,14899,287,262,9151,7415,13
+He speaks fluent French and Spanish. => 1544,9209,43472,4141,290,7897,13
+The train arrived at the station on time. => 464,4512,5284,379,262,4429,319,640,13
+She cooked a delicious meal for her family. => 3347,15847,257,12625,9799,329,607,1641,13
--- /dev/null
+明日の天気はどうですか。 => 263,7353,268,18461,271,1722,18405,265
--- /dev/null
+Hello World! => 12092,3645,2
+I can't believe it's already Friday!" => 42,476,626,2868,352,434,2168,6794,1476
+The URL for the website is https://www.example.com." => 510,10611,323,253,4422,310,5987,1358,2700,15,11667,15,681,449
+"She said, 'I love to travel.'" => 3,2993,753,13,686,42,2389,281,4288,18574
+'The temperature is 25.5°C.' => 8,510,3276,310,2030,15,22,3272,36,2464
+"Let's meet at 2:30 p.m. in the park." => 3,1466,434,2525,387,374,27,1229,268,15,78,15,275,253,5603,449
+The book costs $19.99 => 510,1984,4815,370,746,15,1525
+"John's favorite color is blue." => 3,8732,434,7583,3295,310,4797,449
+Th@nk y0u f0r y0ur h3lp! => 1044,33,30664,340,17,86,269,17,83,340,17,321,288,20,24343,2
+C@n I g3t a c0ffee, pl3@se? => 36,33,79,309,305,20,85,247,260,17,71,6851,13,499,20,33,339,32
+W0w! Th@t's @m@zing! => 56,17,88,2,596,33,85,434,1214,78,33,8537,2
+H0w 4re y0u t0d@y? => 41,17,88,577,250,340,17,86,246,17,69,33,90,32
+I l0ve t0 tr@vel @r0und the w0rld. => 42,298,17,306,246,17,492,33,652,1214,83,17,1504,253,259,17,83,392,15
+Wh@t's y0ur f@v0rite m0vie? => 3152,33,85,434,340,17,321,269,33,87,17,3852,278,17,25858,32
+The cat is sleeping on the mat. => 510,5798,310,14343,327,253,1111,15
+I need to buy some groceries for dinner. => 42,878,281,4489,690,45160,447,323,8955,15
+The sun is shining brightly in the sky. => 510,5101,310,28115,43925,275,253,8467,15
+She is reading a book in the park. => 2993,310,4361,247,1984,275,253,5603,15
+We went for a walk on the beach yesterday. => 1231,2427,323,247,2940,327,253,11600,11066,15
+He plays the guitar like a pro. => 1328,7120,253,12609,751,247,354,15
+They are going to the movies tonight. => 3726,403,1469,281,253,11321,11608,15
+The flowers are blooming in the garden. => 510,12405,403,30601,272,275,253,10329,15
+I enjoy listening to classical music. => 42,4264,11298,281,8946,3440,15
+We need to buy groceries for the week. => 1231,878,281,4489,45160,447,323,253,2129,15
+The dog is chasing its tail in circles. => 510,4370,310,31702,697,8105,275,14240,15
+She is wearing a beautiful red dress. => 2993,310,9398,247,5389,2502,7619,15
+He is a talented actor in Hollywood. => 1328,310,247,21220,12353,275,14759,15
+The children are playing in the playground. => 510,2151,403,4882,275,253,41008,15
+I'm going to visit my grandparents this weekend. => 42,1353,1469,281,4143,619,37186,436,8849,15
+The coffee tastes bitter without sugar. => 510,8574,27491,17123,1293,8618,15
+They are planning a surprise party for her. => 3726,403,7219,247,9326,3128,323,617,15
+She sings like an angel on stage. => 2993,44718,751,271,23087,327,3924,15
+We should take a vacation to relax. => 1231,943,1379,247,18125,281,7921,15
+He is studying medicine at the university. => 1328,310,12392,9921,387,253,9835,15
+The rain is pouring heavily outside. => 510,9313,310,31226,11306,3345,15
+I enjoy watching romantic movies. => 42,4264,7487,18109,11321,15
+They are celebrating their anniversary today. => 3726,403,28765,616,19054,3063,15
+She dances gracefully to the music. => 2993,47078,14426,2920,281,253,3440,15
+He is an excellent basketball player. => 1328,310,271,7126,14648,4760,15
+The baby is sleeping soundly in the crib. => 510,6858,310,14343,3590,314,275,253,260,725,15
+I need to finish my homework before dinner. => 42,878,281,8416,619,32110,1078,8955,15
+They are organizing a charity event next month. => 3726,403,26169,247,19489,2362,1735,1770,15
+She is cooking a delicious meal for us. => 2993,310,12398,247,17319,11484,323,441,15
+We should go hiking in the mountains. => 1231,943,564,33061,275,253,14700,15
+The car broke down on the way to work. => 510,1113,9377,1066,327,253,1039,281,789,15
+He loves playing video games in his free time. => 1328,14528,4882,3492,3958,275,521,1959,673,15
+The birds are chirping in the trees. => 510,11260,403,36494,14650,275,253,7139,15
+I want to learn how to play the piano. => 42,971,281,3037,849,281,1132,253,18542,15
+They are building a new shopping mall in the city. => 3726,403,3652,247,747,12701,28974,275,253,2846,15
+She is writing a novel in her spare time. => 2993,310,4028,247,4460,275,617,18345,673,15
+We are going to the zoo this Saturday. => 1231,403,1469,281,253,41089,436,7814,15
+The cake looks delicious with chocolate frosting. => 510,15221,4453,17319,342,14354,34724,272,15
+He is a talented painter who sells his artwork. => 1328,310,247,21220,27343,665,27924,521,28227,15
+The students are studying for their exams. => 510,3484,403,12392,323,616,34666,15
+I enjoy swimming in the ocean. => 42,4264,17120,275,253,12927,15
+They are renovating their house. => 3726,403,30074,839,616,2419,15
+She is practicing yoga to stay healthy. => 2993,310,25815,25551,281,3297,5875,15
+We should plant flowers in the garden. => 1231,943,4444,12405,275,253,10329,15
+The traffic is heavy during rush hour. => 510,7137,310,5536,1309,16949,4964,15
+He is a skilled chef who creates amazing dishes. => 1328,310,247,18024,26540,665,10513,8644,17114,15
+The baby is crawling on the floor. => 510,6858,310,44922,327,253,5254,15
+I need to buy a new pair of shoes. => 42,878,281,4489,247,747,4667,273,12682,15
+They are going on a road trip across the country. => 3726,403,1469,327,247,3971,7408,2439,253,2586,15
+She is playing the piano beautifully. => 2993,310,4882,253,18542,27839,15
+We are going to a concert tomorrow night. => 1231,403,1469,281,247,12699,10873,2360,15
+The cake tastes delicious with vanilla frosting. => 510,15221,27491,17319,342,26724,34724,272,15
+He is a dedicated teacher who inspires his students. => 1328,310,247,9940,9732,665,6381,2731,521,3484,15
+The students are participating in a science fair. => 510,3484,403,15299,275,247,5859,4344,15
+I enjoy hiking in the mountains. => 42,4264,33061,275,253,14700,15
+They are organizing a beach cleanup next weekend. => 3726,403,26169,247,11600,34709,1735,8849,15
+She is taking photographs of nature. => 2993,310,3192,15928,273,3753,15
+We should try a new restaurant in town. => 1231,943,1611,247,747,10301,275,3874,15
+The traffic is moving slowly on the highway. => 510,7137,310,4886,7808,327,253,17657,15
+He is a talented singer with a beautiful voice. => 1328,310,247,21220,16057,342,247,5389,4318,15
+The baby is laughing and giggling. => 510,6858,310,17053,285,41542,1981,15
+I need to do laundry and wash my clothes. => 42,878,281,513,29023,285,14841,619,10015,15
+They are planning a trip to Europe. => 3726,403,7219,247,7408,281,3060,15
+She is learning how to play the guitar. => 2993,310,4715,849,281,1132,253,12609,15
+We are going to a museum this Sunday. => 1231,403,1469,281,247,16064,436,6926,15
+The coffee smells amazing in the morning. => 510,8574,34247,8644,275,253,4131,15
+He is a hardworking farmer who grows crops. => 1328,310,247,1892,21107,24718,665,17202,19492,15
+The students are presenting their research projects. => 510,3484,403,15250,616,2561,6493,15
+I enjoy playing soccer with my friends. => 42,4264,4882,20391,342,619,3858,15
+They are volunteering at a local shelter. => 3726,403,10057,2158,387,247,1980,17824,15
+She is practicing martial arts for self-defense. => 2993,310,25815,29731,14635,323,1881,14,29337,15
+We should try a new recipe for dinner. => 1231,943,1611,247,747,13612,323,8955,15
+The traffic is congest => 510,7137,310,25801
+The sun is shining brightly today. => 510,5101,310,28115,43925,3063,15
+I enjoy reading books in my free time. => 42,4264,4361,5098,275,619,1959,673,15
+She plays the piano beautifully. => 2993,7120,253,18542,27839,15
+The cat chased the mouse around the room. => 510,5798,40754,253,6521,1475,253,2316,15
+I love eating pizza with extra cheese. => 42,2389,9123,22534,342,4465,12173,15
+He always wears a hat wherever he goes. => 1328,1900,31394,247,7856,20312,344,4566,15
+The flowers in the garden are blooming. => 510,12405,275,253,10329,403,30601,272,15
+She danced gracefully on the stage. => 2993,39860,14426,2920,327,253,3924,15
+The dog barked loudly in the park. => 510,4370,21939,264,31311,275,253,5603,15
+We went swimming in the ocean yesterday. => 1231,2427,17120,275,253,12927,11066,15
+He speaks fluent French and Spanish. => 1328,16544,2938,290,5112,285,9883,15
+The train arrived at the station on time. => 510,6194,7244,387,253,4660,327,673,15
+She cooked a delicious meal for her family. => 2993,18621,247,17319,11484,323,617,2021,15
--- /dev/null
+이것은 테스트 이다. => 12271,296,6474,28037,17
+걱정할 필요 없다. => 18311,482,1062,550,267,17
+버그는 언젠가 고쳐진다. => 6904,272,8575,10381,1765,17
--- /dev/null
+Hello World! => 6466,147,2317,350
+I can't believe it's already Friday!" => 286,512,172,185,13392,393,172,155,3239,147,29249,8537
+The URL for the website is https://www.example.com." => 505,5635,250,170,11745,235,147,303,262,552,148,811,148,241,148,161
+"She said, 'I love to travel.'" => 161,10386,4089,150,206,286,8440,194,147,12363,148,172,161
+'The temperature is 25.5°C.' => 172,505,147,9502,235,147,20022,8516,228,148,172
+"Let's meet at 2:30 p.m. in the park." => 161,8997,172,155,17120,536,147,162,5245,147,207,148,204,148,219,170,147,17664,148,161
+The book costs $19.99 => 505,147,2277,17494,236,166,11824
+"John's favorite color is blue." => 161,7475,172,155,147,11105,147,349,235,17046,148,161
+Th@nk y0u f0r y0ur h3lp! => 6309,240,9019,147,237,159,247,147,202,159,223,147,237,159,2458,147,226,171,3899,350
+C@n I g3t a c0ffee, pl3@se? => 228,240,211,398,147,267,171,185,216,147,196,159,13360,163,150,147,1287,171,240,155,163,272
+W0w! Th@t's @m@zing! => 450,159,274,350,147,6309,240,185,172,155,268,204,240,301,248,350
+H0w 4re y0u t0d@y? => 304,159,274,320,440,147,237,159,247,147,185,159,182,240,237,272
+I l0ve t0 tr@vel @r0und the w0rld. => 286,997,159,1290,147,185,159,147,490,240,3893,268,223,159,3981,170,147,274,159,223,2833,148
+Wh@t's y0ur f@v0rite m0vie? => 450,226,240,185,172,155,147,237,159,2458,147,202,240,252,159,5961,163,147,204,159,24373,272
+The cat is sleeping on the mat. => 505,147,1604,235,147,3987,248,347,170,147,1297,148
+I need to buy some groceries for dinner. => 286,1645,194,147,8068,1499,147,10022,1037,10023,250,147,182,2749,148
+The sun is shining brightly in the sky. => 505,147,5852,235,147,7304,2967,147,215,649,391,219,170,147,7310,148
+She is reading a book in the park. => 10386,235,9838,216,147,2277,219,170,147,17664,148
+We went for a walk on the beach yesterday. => 3250,10825,250,216,147,8156,347,170,294,5371,147,28830,148
+He plays the guitar like a pro. => 5301,7084,155,170,147,4604,2214,1425,216,3474,148
+They are going to the movies tonight. => 18815,429,6552,194,170,147,15877,194,7907,148
+The flowers are blooming in the garden. => 505,147,22953,155,429,147,10411,2799,248,219,170,147,22140,148
+I enjoy listening to classical music. => 286,23162,15876,248,194,239,4251,147,7395,148
+We need to buy groceries for the week. => 3250,1645,194,147,8068,147,10022,1037,10023,250,170,9238,148
+The dog is chasing its tail in circles. => 505,147,6540,235,147,196,916,248,1602,147,5129,219,147,4095,155,148
+She is wearing a beautiful red dress. => 10386,235,147,16427,248,216,147,23447,147,1160,147,14592,148
+He is a talented actor in Hollywood. => 5301,235,216,147,29750,246,147,5112,219,147,16924,391,10477,148
+The children are playing in the playground. => 505,7934,429,7084,248,219,170,7084,12055,148
+I'm going to visit my grandparents this weekend. => 286,172,204,6552,194,9939,1247,147,11806,12019,291,9238,314,148
+The coffee tastes bitter without sugar. => 505,147,21526,147,20931,155,5145,1430,1988,147,28759,148
+They are planning a surprise party for her. => 18815,429,147,23661,216,147,29240,147,7344,250,1869,148
+She sings like an angel on stage. => 10386,147,155,6502,1425,426,147,26028,347,12685,148
+We should take a vacation to relax. => 3250,936,4654,216,147,15388,946,194,1998,2744,148
+He is studying medicine at the university. => 5301,235,7959,248,147,20742,1668,536,170,147,8025,148
+The rain is pouring heavily outside. => 505,147,6885,235,5306,248,1189,5451,391,8096,148
+I enjoy watching romantic movies. => 286,23162,147,3355,248,147,26080,4140,147,15877,148
+They are celebrating their anniversary today. => 18815,429,147,30000,5841,1669,147,24734,5464,1770,13386,148
+She dances gracefully to the music. => 10386,147,182,1626,155,147,267,8771,8001,194,170,147,7395,148
+He is an excellent basketball player. => 5301,235,426,147,12300,675,185,147,26646,5132,6294,148
+The baby is sleeping soundly in the crib. => 505,147,23597,235,147,3987,248,12642,391,219,170,147,7696,215,148
+I need to finish my homework before dinner. => 286,1645,194,147,6717,1247,147,1071,2722,2643,147,182,2749,148
+They are organizing a charity event next month. => 18815,429,147,16442,248,216,1054,1511,1663,2399,12821,148
+She is cooking a delicious meal for us. => 10386,235,147,20453,248,216,3936,23455,147,26658,250,147,539,148
+We should go hiking in the mountains. => 3250,936,4242,147,2254,5357,219,170,147,204,18028,155,148
+The car broke down on the way to work. => 505,7553,147,510,10036,4288,347,170,3699,194,1916,148
+He loves playing video games in his free time. => 5301,8440,155,7084,248,8722,147,11281,219,1439,4002,801,148
+The birds are chirping in the trees. => 505,147,13043,155,429,147,3904,223,4639,219,170,5311,155,148
+I want to learn how to play the piano. => 286,1857,194,14167,2496,194,7084,170,147,207,23635,148
+They are building a new shopping mall in the city. => 18815,429,11038,216,277,147,22184,147,204,609,219,170,147,2416,148
+She is writing a novel in her spare time. => 10386,235,3242,216,147,25814,219,1869,6772,2382,801,148
+We are going to the zoo this Saturday. => 3250,429,6552,194,170,147,25101,291,147,31426,148
+The cake looks delicious with chocolate frosting. => 505,147,24422,16303,3936,23455,312,147,5619,533,2239,147,202,3973,3431,148
+He is a talented painter who sells his artwork. => 5301,235,216,147,29750,246,147,9226,279,2888,13004,155,1439,12234,2722,148
+The students are studying for their exams. => 505,15707,429,7959,248,250,1669,147,12398,155,148
+I enjoy swimming in the ocean. => 286,23162,147,4729,8528,248,219,170,147,26193,148
+They are renovating their house. => 18815,429,991,10724,3643,1669,13788,148
+She is practicing yoga to stay healthy. => 10386,235,147,18453,248,147,5063,1186,194,15344,147,28550,148
+We should plant flowers in the garden. => 3250,936,147,9212,147,22953,155,219,170,147,22140,148
+The traffic is heavy during rush hour. => 505,147,11097,235,147,22232,4340,147,22319,147,5686,148
+He is a skilled chef who creates amazing dishes. => 5301,235,216,147,8891,246,9784,202,2888,13720,147,28880,147,23852,383,148
+The baby is crawling on the floor. => 505,147,23597,235,147,22120,248,347,170,147,5895,148
+I need to buy a new pair of shoes. => 286,1645,194,147,8068,216,277,12632,210,147,155,21953,155,148
+They are going on a road trip across the country. => 18815,429,6552,347,216,147,6362,147,11395,9762,170,11305,148
+She is playing the piano beautifully. => 10386,235,7084,248,170,147,207,23635,147,23447,391,148
+We are going to a concert tomorrow night. => 3250,429,6552,194,216,1710,4391,29524,12716,148
+The cake tastes delicious with vanilla frosting. => 505,147,24422,147,20931,155,3936,23455,312,5535,7476,147,202,3973,3431,148
+He is a dedicated teacher who inspires his students. => 5301,235,216,326,8298,3460,147,9675,2888,147,28801,155,1439,15707,148
+The students are participating in a science fair. => 505,15707,429,147,30961,3643,219,216,147,10587,147,7636,148
+I enjoy hiking in the mountains. => 286,23162,147,2254,5357,219,170,147,204,18028,155,148
+They are organizing a beach cleanup next weekend. => 18815,429,147,16442,248,216,294,5371,147,10401,2399,9238,314,148
+She is taking photographs of nature. => 10386,235,147,12345,147,4709,1547,155,210,147,211,8603,148
+We should try a new restaurant in town. => 3250,936,147,746,216,277,147,11007,219,147,10200,148
+The traffic is moving slowly on the highway. => 505,147,11097,235,147,8601,147,9880,391,347,170,5976,3330,148
+He is a talented singer with a beautiful voice. => 5301,235,216,147,29750,246,147,155,248,279,312,216,147,23447,147,9316,148
+The baby is laughing and giggling. => 505,147,23597,235,147,23066,248,221,147,2341,3631,2869,148
+I need to do laundry and wash my clothes. => 286,1645,194,543,960,3981,2154,221,147,27589,1247,147,22141,383,148
+They are planning a trip to Europe. => 18815,429,147,23661,216,147,11395,194,13131,148
+She is learning how to play the guitar. => 10386,235,11754,2496,194,7084,170,147,4604,2214,148
+We are going to a museum this Sunday. => 3250,429,6552,194,216,147,204,433,1177,291,147,29111,148
+The coffee smells amazing in the morning. => 505,147,21526,31454,155,147,28880,219,170,20701,148
+He is a hardworking farmer who grows crops. => 5301,235,216,8524,14992,147,16679,279,2888,147,6044,155,147,8650,155,148
+The students are presenting their research projects. => 505,15707,429,5130,248,1669,13217,14235,148
+I enjoy playing soccer with my friends. => 286,23162,7084,248,147,9351,5318,312,1247,147,5347,155,148
+They are volunteering at a local shelter. => 18815,429,147,5238,7478,163,12798,536,216,2491,2905,1359,279,148
+She is practicing martial arts for self-defense. => 10386,235,147,18453,248,147,3261,185,4381,12234,155,250,623,153,29896,148
+We should try a new recipe for dinner. => 3250,936,147,746,216,277,147,9851,250,147,182,2749,148
+The traffic is congest => 505,147,11097,235,1710,14169
+The sun is shining brightly today. => 505,147,5852,235,147,7304,2967,147,215,649,391,13386,148
+I enjoy reading books in my free time. => 286,23162,9838,147,9670,219,1247,4002,801,148
+She plays the piano beautifully. => 10386,7084,155,170,147,207,23635,147,23447,391,148
+The cat chased the mouse around the room. => 505,147,1604,147,196,916,246,170,12551,6890,170,9654,148
+I love eating pizza with extra cheese. => 286,8440,147,163,3643,147,207,8403,312,8230,9784,383,163,148
+He always wears a hat wherever he goes. => 5301,5418,147,16427,155,216,147,4879,2171,2433,1189,16177,148
+The flowers in the garden are blooming. => 505,147,22953,155,219,170,147,22140,429,147,10411,2799,248,148
+She danced gracefully on the stage. => 10386,13378,12408,147,267,8771,8001,347,170,12685,148
+The dog barked loudly in the park. => 505,147,6540,147,973,293,246,147,30182,391,219,170,147,17664,148
+We went swimming in the ocean yesterday. => 3250,10825,147,4729,8528,248,219,170,147,26193,147,28830,148
+He speaks fluent French and Spanish. => 5301,147,13285,155,147,21677,147,254,17590,221,147,31519,148
+The train arrived at the station on time. => 505,147,872,147,20712,182,536,170,147,7184,347,801,148
+She cooked a delicious meal for her family. => 10386,147,20453,246,216,3936,23455,147,26658,250,1869,147,2002,148
--- /dev/null
+Hello World! => 8279,10896,19
+I can't believe it's already Friday!" => 59,883,1330,13710,561,1182,3425,506,25674,11555
+The URL for the website is https://www.example.com." => 1318,3834,436,322,9575,438,1678,555,1499,32,2763,32,508,3107
+"She said, 'I love to travel.'" => 20,25387,9884,30,330,59,14290,372,25283,29329
+'The temperature is 25.5°C.' => 25,1318,13587,438,225,36,39,32,39,23767,53,4564
+"Let's meet at 2:30 p.m. in the park." => 20,9809,1182,18450,821,225,36,44,37,34,298,32,95,32,328,322,880,93,3107
+The book costs $19.99 => 1318,7618,25950,398,35,43,32,43,43
+"John's favorite color is blue." => 20,19693,1182,27448,1963,438,10087,3107
+Th@nk y0u f0r y0ur h3lp! => 1027,50,19877,533,34,103,296,34,100,533,34,305,420,37,1915,19
+C@n I g3t a c0ffee, pl3@se? => 53,50,96,439,485,37,102,312,281,34,21298,30,1278,37,50,277,49
+W0w! Th@t's @m@zing! => 73,34,105,19,947,50,102,1182,477,95,50,26768,19
+H0w 4re y0u t0d@y? => 58,34,105,225,38,268,533,34,103,273,34,86,50,107,49
+I l0ve t0 tr@vel @r0und the w0rld. => 59,456,34,587,273,34,554,50,1203,477,100,34,642,322,341,34,100,1381,32
+Wh@t's y0ur f@v0rite m0vie? => 2444,50,102,1182,533,34,305,296,50,104,34,1049,345,34,104,1075,49
+The cat is sleeping on the mat. => 1318,10501,438,9368,299,544,322,2491,32
+I need to buy some groceries for dinner. => 59,1849,372,16968,1629,20234,85,6958,436,343,3369,32
+The sun is shining brightly in the sky. => 1318,15323,438,787,19068,38231,631,328,322,26718,32
+She is reading a book in the park. => 25387,438,9175,312,7618,328,322,880,93,32
+We went for a walk on the beach yesterday. => 3122,14236,436,312,13503,544,322,526,867,39485,32
+He plays the guitar like a pro. => 1331,41271,322,3932,19931,2124,312,534,32
+They are going to the movies tonight. => 31805,884,6783,372,322,27889,26076,694,32
+The flowers are blooming in the garden. => 1318,7290,483,884,323,18466,299,328,322,485,22461,32
+I enjoy listening to classical music. => 59,31567,20498,372,443,1578,17522,32
+We need to buy groceries for the week. => 3122,1849,372,16968,20234,85,6958,436,322,8209,32
+The dog is chasing its tail in circles. => 1318,27435,438,663,9949,2819,13203,328,46428,32
+She is wearing a beautiful red dress. => 25387,438,996,6992,312,36493,3346,343,714,32
+He is a talented actor in Hollywood. => 1331,438,312,273,9556,318,16038,328,48228,631,21118,32
+The children are playing in the playground. => 1318,5713,884,19788,328,322,4654,1749,32
+I'm going to visit my grandparents this weekend. => 59,3464,6783,372,7725,1672,33162,19277,458,40618,32
+The coffee tastes bitter without sugar. => 1318,36917,273,633,307,3493,391,2876,309,18628,32
+They are planning a surprise party for her. => 31805,884,26116,312,6178,9251,15270,436,7791,32
+She sings like an angel on stage. => 25387,309,2052,2124,600,600,17691,544,10019,32
+We should take a vacation to relax. => 3122,1395,4818,312,29164,367,372,41972,32
+He is studying medicine at the university. => 1331,438,14866,299,32388,482,821,322,707,9190,32
+The rain is pouring heavily outside. => 1318,36987,438,9202,299,46003,2801,11127,32
+I enjoy watching romantic movies. => 59,31567,37652,26045,7268,27889,32
+They are celebrating their anniversary today. => 31805,884,48278,839,1741,3623,23921,5810,672,11610,32
+She dances gracefully to the music. => 25387,343,3151,31376,4938,372,322,17522,32
+He is an excellent basketball player. => 1331,438,600,39203,48400,11653,4362,32
+The baby is sleeping soundly in the crib. => 1318,323,17156,438,9368,299,9934,631,328,322,281,7972,32
+I need to finish my homework before dinner. => 59,1849,372,11361,1672,6765,1007,2670,343,3369,32
+They are organizing a charity event next month. => 31805,884,10558,6183,312,1351,543,1692,2354,6811,32
+She is cooking a delicious meal for us. => 25387,438,23682,299,312,409,406,2406,597,279,436,1770,32
+We should go hiking in the mountains. => 3122,1395,1983,420,1546,299,328,322,10874,1907,32
+The car broke down on the way to work. => 1318,6346,43289,2835,544,322,3352,372,1389,32
+He loves playing video games in his free time. => 1331,598,4954,19788,6027,19705,328,6697,3741,1133,32
+The birds are chirping in the trees. => 1318,8424,3210,884,663,476,7075,328,322,23453,32
+I want to learn how to play the piano. => 59,2637,372,7350,2624,372,4654,322,298,25757,32
+They are building a new shopping mall in the city. => 31805,884,9038,312,537,40692,345,464,328,322,11297,32
+She is writing a novel in her spare time. => 25387,438,4127,312,32913,328,7791,1869,586,1133,32
+We are going to the zoo this Saturday. => 3122,884,6783,372,322,1288,604,458,358,30288,32
+The cake looks delicious with chocolate frosting. => 1318,281,1062,7780,409,406,2406,623,10408,27589,296,20932,299,32
+He is a talented painter who sells his artwork. => 1331,438,312,273,9556,318,42300,6560,10800,101,6697,5549,1007,32
+The students are studying for their exams. => 1318,16512,884,14866,299,436,3623,538,1462,32
+I enjoy swimming in the ocean. => 59,31567,2535,449,6714,328,322,337,18857,32
+They are renovating their house. => 31805,884,316,15007,1741,3623,17075,32
+She is practicing yoga to stay healthy. => 25387,438,11808,11636,533,40067,372,20005,44538,32
+We should plant flowers in the garden. => 3122,1395,26795,7290,483,328,322,485,22461,32
+The traffic is heavy during rush hour. => 1318,16391,438,32389,5929,540,1372,12021,32
+He is a skilled chef who creates amazing dishes. => 1331,438,312,3001,12088,44051,6560,9585,36986,1214,4279,32
+The baby is crawling on the floor. => 1318,323,17156,438,281,1294,2920,544,322,17648,32
+I need to buy a new pair of shoes. => 59,1849,372,16968,312,537,6092,432,787,37764,32
+They are going on a road trip across the country. => 31805,884,6783,544,312,24122,19337,10160,322,10769,32
+She is playing the piano beautifully. => 25387,438,19788,322,298,25757,526,4846,325,514,107,32
+We are going to a concert tomorrow night. => 3122,884,6783,372,312,457,6989,31841,19212,32
+The cake tastes delicious with vanilla frosting. => 1318,281,1062,273,633,307,409,406,2406,623,44653,296,20932,299,32
+He is a dedicated teacher who inspires his students. => 1331,438,312,23112,30877,6560,26194,8017,6697,16512,32
+The students are participating in a science fair. => 1318,16512,884,24623,1741,328,312,27536,19375,32
+I enjoy hiking in the mountains. => 59,31567,420,1546,299,328,322,10874,1907,32
+They are organizing a beach cleanup next weekend. => 31805,884,10558,6183,312,526,867,13144,2354,40618,32
+She is taking photographs of nature. => 25387,438,15137,15110,23626,432,24406,32
+We should try a new restaurant in town. => 3122,1395,1596,312,537,43719,328,38212,32
+The traffic is moving slowly on the highway. => 1318,16391,438,14089,12899,631,544,322,3857,3073,32
+He is a talented singer with a beautiful voice. => 1331,438,312,273,9556,318,309,10118,623,312,36493,20309,32
+The baby is laughing and giggling. => 1318,323,17156,438,2317,2943,299,461,485,365,36088,32
+I need to do laundry and wash my clothes. => 59,1849,372,745,2317,642,994,461,341,917,1672,7375,46948,32
+They are planning a trip to Europe. => 31805,884,26116,312,19337,372,27268,32
+She is learning how to play the guitar. => 25387,438,9608,2624,372,4654,322,3932,19931,32
+We are going to a museum this Sunday. => 3122,884,6783,372,312,345,539,378,458,358,28036,32
+The coffee smells amazing in the morning. => 1318,36917,309,42153,101,36986,328,322,33768,32
+He is a hardworking farmer who grows crops. => 1331,438,312,6784,13578,9019,2302,6560,485,2138,25170,1069,32
+The students are presenting their research projects. => 1318,16512,884,5024,299,3623,13234,8528,32
+I enjoy playing soccer with my friends. => 59,31567,19788,22682,10035,623,1672,22523,32
+They are volunteering at a local shelter. => 31805,884,3920,45585,8637,821,312,2196,309,2542,391,32
+She is practicing martial arts for self-defense. => 25387,438,11808,11636,345,502,564,5549,101,436,630,31,43694,32
+We should try a new recipe for dinner. => 3122,1395,1596,312,537,15233,436,343,3369,32
+The traffic is congest => 1318,16391,438,457,2776
+The sun is shining brightly today. => 1318,15323,438,787,19068,38231,631,11610,32
+I enjoy reading books in my free time. => 59,31567,9175,21739,328,1672,3741,1133,32
+She plays the piano beautifully. => 25387,41271,322,298,25757,526,4846,325,514,107,32
+The cat chased the mouse around the room. => 1318,10501,663,16109,322,8459,6835,322,8355,32
+I love eating pizza with extra cheese. => 59,14290,484,1741,47630,623,6717,8277,30315,32
+He always wears a hat wherever he goes. => 1331,5182,996,4177,312,25793,2154,424,938,13107,32
+The flowers in the garden are blooming. => 1318,7290,483,328,322,485,22461,884,323,18466,299,32
+She danced gracefully on the stage. => 25387,343,6087,31376,4938,544,322,10019,32
+The dog barked loudly in the park. => 1318,27435,323,1087,318,598,836,631,328,322,880,93,32
+We went swimming in the ocean yesterday. => 3122,14236,2535,449,6714,328,322,337,18857,39485,32
+He speaks fluent French and Spanish. => 1331,24498,101,38055,43652,461,14911,1708,32
+The train arrived at the station on time. => 1318,5683,2099,32114,821,322,18662,544,1133,32
+She cooked a delicious meal for her family. => 25387,23682,318,312,409,406,2406,597,279,436,7791,13872,32
--- /dev/null
+# test case format
+# <language>: <sentence>
+
+English: Hello World!
+English: I can't believe it's already Friday!"
+English: The URL for the website is https://www.example.com."
+English: "She said, 'I love to travel.'"
+English: 'The temperature is 25.5°C.'
+English: "Let's meet at 2:30 p.m. in the park."
+English: The book costs $19.99
+English: "John's favorite color is blue."
+English: Th@nk y0u f0r y0ur h3lp!
+English: C@n I g3t a c0ffee, pl3@se?
+English: W0w! Th@t's @m@zing!
+English: H0w 4re y0u t0d@y?
+English: I l0ve t0 tr@vel @r0und the w0rld.
+English: Wh@t's y0ur f@v0rite m0vie?
+English: The cat is sleeping on the mat.
+English: I need to buy some groceries for dinner.
+English: The sun is shining brightly in the sky.
+English: She is reading a book in the park.
+English: We went for a walk on the beach yesterday.
+English: He plays the guitar like a pro.
+English: They are going to the movies tonight.
+English: The flowers are blooming in the garden.
+English: I enjoy listening to classical music.
+English: We need to buy groceries for the week.
+English: The dog is chasing its tail in circles.
+English: She is wearing a beautiful red dress.
+English: He is a talented actor in Hollywood.
+English: The children are playing in the playground.
+English: I'm going to visit my grandparents this weekend.
+English: The coffee tastes bitter without sugar.
+English: They are planning a surprise party for her.
+English: She sings like an angel on stage.
+English: We should take a vacation to relax.
+English: He is studying medicine at the university.
+English: The rain is pouring heavily outside.
+English: I enjoy watching romantic movies.
+English: They are celebrating their anniversary today.
+English: She dances gracefully to the music.
+English: He is an excellent basketball player.
+English: The baby is sleeping soundly in the crib.
+English: I need to finish my homework before dinner.
+English: They are organizing a charity event next month.
+English: She is cooking a delicious meal for us.
+English: We should go hiking in the mountains.
+English: The car broke down on the way to work.
+English: He loves playing video games in his free time.
+English: The birds are chirping in the trees.
+English: I want to learn how to play the piano.
+English: They are building a new shopping mall in the city.
+English: She is writing a novel in her spare time.
+English: We are going to the zoo this Saturday.
+English: The cake looks delicious with chocolate frosting.
+English: He is a talented painter who sells his artwork.
+English: The students are studying for their exams.
+English: I enjoy swimming in the ocean.
+English: They are renovating their house.
+English: She is practicing yoga to stay healthy.
+English: We should plant flowers in the garden.
+English: The traffic is heavy during rush hour.
+English: He is a skilled chef who creates amazing dishes.
+English: The baby is crawling on the floor.
+English: I need to buy a new pair of shoes.
+English: They are going on a road trip across the country.
+English: She is playing the piano beautifully.
+English: We are going to a concert tomorrow night.
+English: The cake tastes delicious with vanilla frosting.
+English: He is a dedicated teacher who inspires his students.
+English: The students are participating in a science fair.
+English: I enjoy hiking in the mountains.
+English: They are organizing a beach cleanup next weekend.
+English: She is taking photographs of nature.
+English: We should try a new restaurant in town.
+English: The traffic is moving slowly on the highway.
+English: He is a talented singer with a beautiful voice.
+English: The baby is laughing and giggling.
+English: I need to do laundry and wash my clothes.
+English: They are planning a trip to Europe.
+English: She is learning how to play the guitar.
+English: We are going to a museum this Sunday.
+English: The coffee smells amazing in the morning.
+English: He is a hardworking farmer who grows crops.
+English: The students are presenting their research projects.
+English: I enjoy playing soccer with my friends.
+English: They are volunteering at a local shelter.
+English: She is practicing martial arts for self-defense.
+English: We should try a new recipe for dinner.
+English: The traffic is congest
+English: The sun is shining brightly today.
+English: I enjoy reading books in my free time.
+English: She plays the piano beautifully.
+English: The cat chased the mouse around the room.
+English: I love eating pizza with extra cheese.
+English: He always wears a hat wherever he goes.
+English: The flowers in the garden are blooming.
+English: She danced gracefully on the stage.
+English: The dog barked loudly in the park.
+English: We went swimming in the ocean yesterday.
+English: He speaks fluent French and Spanish.
+English: The train arrived at the station on time.
+English: She cooked a delicious meal for her family.
+Korean: 이것은 테스트 이다.
+Korean: 걱정할 필요 없다.
+Korean: 버그는 언젠가 고쳐진다.
+Japanese: 明日の天気はどうですか。
+Chinese: 请问洗手间在哪里?
+Emoji: I'm feeling 😄 today!
+Unicode: ◑ ▢ ▣ ◱
\ No newline at end of file
--- /dev/null
+import os
+from transformers import AutoTokenizer
+
+os.environ['TOKENIZERS_PARALLELISM'] = "false"
+
+list_repo_hf = ["databricks/dolly-v2-3b", # dolly-v2 (3b, 7b, 12b models share the same tokenizer)
+ "gpt2", # gpt-2 (gpt2-xl, gpt2-large share the same tokenizer)
+ "uer/gpt2-chinese-cluecorpussmall", # gpt-2-chinese
+ "EleutherAI/gpt-j-6b", # gpt-j
+ "EleutherAI/gpt-neox-20b", # gpt-neox
+ "EleutherAI/polyglot-ko-1.3b", # gpt-neox (polyglot-ko 5.8b and 12.8b share the same tokenizer")
+ "rinna/japanese-gpt-neox-3.6b", # gpt-neox
+ # mpt-7b (uses gpt-neox-20b tokenizer)
+ "replit/replit-code-v1-3b", # replit
+ "bigcode/starcoder", # starcoder (huggingface-cli login required)
+ "openai/whisper-tiny" # whisper (base, large, large-v2 share the same tokenizer)
+ ]
+
+repo2ggml = {"databricks/dolly-v2-3b" : "dolly-v2",
+ "gpt2" : "gpt-2",
+ "uer/gpt2-chinese-cluecorpussmall" : "gpt-2-chinese",
+ "EleutherAI/gpt-j-6b" : "gpt-j",
+ "EleutherAI/gpt-neox-20b" : "gpt-neox",
+ "EleutherAI/polyglot-ko-1.3b" : "polyglot-ko",
+ "rinna/japanese-gpt-neox-3.6b" : "gpt-neox-japanese",
+ "replit/replit-code-v1-3b" : "replit",
+ "bigcode/starcoder" : "starcoder",
+ "openai/whisper-tiny" : "whisper"}
+
+repo2language = {"databricks/dolly-v2-3b" : "english",
+ "gpt2" : "english",
+ "uer/gpt2-chinese-cluecorpussmall" : "chinese",
+ "EleutherAI/gpt-j-6b" : "english",
+ "EleutherAI/gpt-neox-20b" : "english",
+ "EleutherAI/polyglot-ko-1.3b" : "korean",
+ "rinna/japanese-gpt-neox-3.6b" : "japanese",
+ "replit/replit-code-v1-3b" : "english",
+ "bigcode/starcoder" : "english",
+ "openai/whisper-tiny" : "english"}
+
+delimeter = ": "
+test_sentences = []
+with open("test-cases.txt", "r") as f:
+ lines = [l.rstrip() for l in f.readlines()]
+ for l in lines:
+ if delimeter in l:
+ language = l[:l.index(delimeter)]
+ sentence = l[l.index(delimeter) + len(delimeter):]
+ test_sentences.append((language.lower(), sentence))
+
+for repo in list_repo_hf:
+
+ target_language = repo2language[repo]
+
+ tokenizer = AutoTokenizer.from_pretrained(repo, trust_remote_code=True)
+
+ tokens_hf = []
+ for language, sentence in test_sentences:
+ if language == target_language:
+ tokens = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sentence))
+ tokens_hf.append((sentence, tokens))
+
+ save_txt = repo2ggml[repo] + ".txt"
+ with open(save_txt, "w") as f:
+ f.writelines([sentence + " => " + ",".join(str(t) for t in tokens) + "\n" for sentence, tokens in tokens_hf])
--- /dev/null
+Hello World! => 15947,3937,0
+I can't believe it's already Friday!" => 40,393,380,1697,309,311,1217,6984,2963
+The URL for the website is https://www.example.com." => 2278,12905,337,220,3322,3144,307,34426,21492,17919,13,3121,335,781,13,1112,889
+"She said, 'I love to travel.'" => 1,9526,848,11,922,40,959,220,1353,220,17227,779,28763
+'The temperature is 25.5°C.' => 6,2278,220,18275,610,1503,307,3552,13,20,11782,34,4443
+"Let's meet at 2:30 p.m. in the park." => 1,8373,311,1677,412,568,25,3446,280,13,76,13,294,220,3322,3884,889
+The book costs $19.99 => 2278,1446,5497,1848,3405,13,8494
+"John's favorite color is blue." => 1,16938,311,2954,2017,307,3344,889
+Th@nk y0u f0r y0ur h3lp! => 2434,31,77,74,288,15,84,283,15,81,288,15,374,276,18,75,79,0
+C@n I g3t a c0ffee, pl3@se? => 34,31,77,286,290,18,83,257,269,15,4617,11,499,18,31,405,30
+W0w! Th@t's @m@zing! => 54,15,86,0,334,31,83,311,10428,76,31,8781,0
+H0w 4re y0u t0d@y? => 39,15,86,1017,265,288,15,84,220,83,15,67,31,88,30
+I l0ve t0 tr@vel @r0und the w0rld. => 40,287,15,303,220,83,15,220,6903,31,779,10428,81,15,997,220,3322,261,15,81,348,13
+Wh@t's y0ur f@v0rite m0vie? => 2471,31,83,311,288,15,374,283,31,85,15,35002,275,15,12702,30
+The cat is sleeping on the mat. => 2278,3857,307,8296,322,220,3322,3803,13
+I need to buy some groceries for dinner. => 40,643,220,1353,2256,512,31391,337,6148,13
+The sun is shining brightly in the sky. => 2278,3295,307,18269,47418,294,220,3322,5443,13
+She is reading a book in the park. => 9526,307,3760,257,1446,294,220,3322,3884,13
+We went for a walk on the beach yesterday. => 4360,1437,337,257,1792,322,220,3322,7534,5186,13
+He plays the guitar like a pro. => 5205,5749,220,3322,7531,411,257,447,13
+They are going to the movies tonight. => 8829,366,516,220,1353,220,3322,6233,220,1756,397,13
+The flowers are blooming in the garden. => 2278,8085,366,45294,294,220,3322,7431,13
+I enjoy listening to classical music. => 40,2103,4764,220,1353,13735,1318,13
+We need to buy groceries for the week. => 4360,643,220,1353,2256,31391,337,220,3322,1243,13
+The dog is chasing its tail in circles. => 2278,3000,307,17876,1080,220,14430,294,13040,13
+She is wearing a beautiful red dress. => 9526,307,4769,257,2238,2182,5231,13
+He is a talented actor in Hollywood. => 5205,307,257,220,32831,6003,8747,294,11628,13
+The children are playing in the playground. => 2278,2227,366,2433,294,220,3322,24646,13
+I'm going to visit my grandparents this weekend. => 40,478,516,220,1353,3441,452,21876,220,11176,6711,13
+The coffee tastes bitter without sugar. => 2278,4982,220,83,40246,13871,1553,5076,13
+They are planning a surprise party for her. => 8829,366,5038,257,6365,3595,337,720,13
+She sings like an angel on stage. => 9526,23250,411,364,14250,322,3233,13
+We should take a vacation to relax. => 4360,820,220,27612,257,12830,220,1353,5789,13
+He is studying medicine at the university. => 5205,307,7601,7195,412,220,3322,5454,13
+The rain is pouring heavily outside. => 2278,4830,307,20450,10950,2380,13
+I enjoy watching romantic movies. => 40,2103,1976,13590,6233,13
+They are celebrating their anniversary today. => 8829,366,15252,220,3322,347,12962,220,83,378,320,13
+She dances gracefully to the music. => 9526,28322,10042,2277,220,1353,220,3322,1318,13
+He is an excellent basketball player. => 5205,307,364,7103,11767,4256,13
+The baby is sleeping soundly in the crib. => 2278,3186,307,8296,1626,356,294,220,3322,47163,13
+I need to finish my homework before dinner. => 40,643,220,1353,2413,452,14578,949,6148,13
+They are organizing a charity event next month. => 8829,366,17608,257,16863,2280,958,1618,13
+She is cooking a delicious meal for us. => 9526,307,6361,257,4809,6791,337,505,13
+We should go hiking in the mountains. => 4360,820,352,23784,294,220,3322,10233,13
+The car broke down on the way to work. => 2278,1032,6902,760,322,220,3322,636,220,1353,589,13
+He loves playing video games in his free time. => 5205,6752,2433,960,2813,294,702,1737,220,3766,13
+The birds are chirping in the trees. => 2278,9009,366,36682,294,220,3322,220,3599,279,13
+I want to learn how to play the piano. => 40,528,220,1353,1466,577,220,1353,862,220,3322,9211,13
+They are building a new shopping mall in the city. => 8829,366,2390,257,777,8688,16026,294,220,3322,2307,13
+She is writing a novel in her spare time. => 9526,307,3579,257,7613,294,720,13798,220,3766,13
+We are going to the zoo this Saturday. => 4360,366,516,220,1353,220,3322,25347,220,11176,8803,13
+The cake looks delicious with chocolate frosting. => 2278,5908,1542,4809,365,6215,37048,13
+He is a talented painter who sells his artwork. => 5205,307,257,220,32831,6003,26619,567,20897,702,15829,13
+The students are studying for their exams. => 2278,1731,366,7601,337,220,3322,347,20514,13
+I enjoy swimming in the ocean. => 40,2103,11989,294,220,3322,7810,13
+They are renovating their house. => 8829,366,18845,990,220,3322,347,1782,13
+She is practicing yoga to stay healthy. => 9526,307,11350,15128,220,1353,1754,4627,13
+We should plant flowers in the garden. => 4360,820,3709,8085,294,220,3322,7431,13
+The traffic is heavy during rush hour. => 2278,220,17227,3341,307,4676,1830,9300,1773,13
+He is a skilled chef who creates amazing dishes. => 5205,307,257,19690,10530,567,7829,2243,10814,13
+The baby is crawling on the floor. => 2278,3186,307,32979,322,220,3322,4123,13
+I need to buy a new pair of shoes. => 40,643,220,1353,2256,257,777,6119,295,6654,13
+They are going on a road trip across the country. => 8829,366,516,322,257,3060,220,83,8400,2108,220,3322,1941,13
+She is playing the piano beautifully. => 9526,307,2433,220,3322,9211,16525,13
+We are going to a concert tomorrow night. => 4360,366,516,220,1353,257,8543,220,83,298,3162,1818,13
+The cake tastes delicious with vanilla frosting. => 2278,5908,220,83,40246,4809,365,17528,37048,13
+He is a dedicated teacher who inspires his students. => 5205,307,257,8374,220,975,4062,567,32566,702,1731,13
+The students are participating in a science fair. => 2278,1731,366,13950,294,257,3497,3143,13
+I enjoy hiking in the mountains. => 40,2103,23784,294,220,3322,10233,13
+They are organizing a beach cleanup next weekend. => 8829,366,17608,257,7534,40991,958,6711,13
+She is taking photographs of nature. => 9526,307,220,48625,17649,295,3687,13
+We should try a new restaurant in town. => 4360,820,220,83,627,257,777,6383,294,220,30401,13
+The traffic is moving slowly on the highway. => 2278,220,17227,3341,307,2684,5692,322,220,3322,17205,13
+He is a talented singer with a beautiful voice. => 5205,307,257,220,32831,6003,11564,365,257,2238,3177,13
+The baby is laughing and giggling. => 2278,3186,307,5059,293,290,24542,13
+I need to do laundry and wash my clothes. => 40,643,220,1353,360,19811,293,5675,452,5534,13
+They are planning a trip to Europe. => 8829,366,5038,257,220,83,8400,220,1353,3315,13
+She is learning how to play the guitar. => 9526,307,2539,577,220,1353,862,220,3322,7531,13
+We are going to a museum this Sunday. => 4360,366,516,220,1353,257,8441,220,11176,7776,13
+The coffee smells amazing in the morning. => 2278,4982,10036,2243,294,220,3322,2446,13
+He is a hardworking farmer who grows crops. => 5205,307,257,1152,22475,17891,567,13156,16829,13
+The students are presenting their research projects. => 2278,1731,366,15578,220,3322,347,2132,4455,13
+I enjoy playing soccer with my friends. => 40,2103,2433,15469,365,452,1855,13
+They are volunteering at a local shelter. => 8829,366,33237,412,257,2654,13341,13
+She is practicing martial arts for self-defense. => 9526,307,11350,20755,8609,337,2698,12,49268,13
+We should try a new recipe for dinner. => 4360,820,220,83,627,257,777,6782,337,6148,13
+The traffic is congest => 2278,220,17227,3341,307,31871
+The sun is shining brightly today. => 2278,3295,307,18269,47418,220,83,378,320,13
+I enjoy reading books in my free time. => 40,2103,3760,3642,294,452,1737,220,3766,13
+She plays the piano beautifully. => 9526,5749,220,3322,9211,16525,13
+The cat chased the mouse around the room. => 2278,3857,33091,220,3322,9719,926,220,3322,1808,13
+I love eating pizza with extra cheese. => 40,959,3936,8298,365,2857,5399,13
+He always wears a hat wherever he goes. => 5205,1009,20877,257,2385,8660,415,1709,13
+The flowers in the garden are blooming. => 2278,8085,294,220,3322,7431,366,45294,13
+She danced gracefully on the stage. => 9526,32909,10042,2277,322,220,3322,3233,13
+The dog barked loudly in the park. => 2278,3000,16202,292,22958,294,220,3322,3884,13
+We went swimming in the ocean yesterday. => 4360,1437,11989,294,220,3322,7810,5186,13
+He speaks fluent French and Spanish. => 5205,10789,40799,5522,293,8058,13
+The train arrived at the station on time. => 2278,220,83,7146,6678,412,220,3322,5214,322,220,3766,13
+She cooked a delicious meal for her family. => 9526,9267,257,4809,6791,337,720,1605,13
ggml_free(model.ctx);
return 0;
-}
+}
\ No newline at end of file
}
t_load_us = ggml_time_us() - t_start_us;
+
+ test_gpt_tokenizer(vocab, params.token_test);
}
int n_past = 0;