1. [Markdown](#markdown)
2. [CSV](#csv)
3. [JSON](#json)
- 4. [SQL](#sql)
+ 4. [JSONL](#jsonl)
+ 5. [SQL](#sql)
## Syntax
options:
-h, --help
- -m, --model <filename> (default: models/7B/ggml-model-q4_0.gguf)
- -p, --n-prompt <n> (default: 512)
- -n, --n-gen <n> (default: 128)
- -pg <pp,tg> (default: 512,128)
- -b, --batch-size <n> (default: 2048)
- -ub, --ubatch-size <n> (default: 512)
- -ctk, --cache-type-k <t> (default: f16)
- -ctv, --cache-type-v <t> (default: f16)
- -t, --threads <n> (default: 16)
- -ngl, --n-gpu-layers <n> (default: 99)
- -sm, --split-mode <none|layer|row> (default: layer)
- -mg, --main-gpu <i> (default: 0)
- -nkvo, --no-kv-offload <0|1> (default: 0)
- -fa, --flash-attn <0|1> (default: 0)
- -mmp, --mmap <0|1> (default: 1)
- --numa <distribute|isolate|numactl> (default: disabled)
- -embd, --embeddings <0|1> (default: 0)
- -ts, --tensor-split <ts0/ts1/..> (default: 0)
- -r, --repetitions <n> (default: 5)
- -o, --output <csv|json|md|sql> (default: md)
- -v, --verbose (default: 0)
+ -m, --model <filename> (default: models/7B/ggml-model-q4_0.gguf)
+ -p, --n-prompt <n> (default: 512)
+ -n, --n-gen <n> (default: 128)
+ -pg <pp,tg> (default: )
+ -b, --batch-size <n> (default: 2048)
+ -ub, --ubatch-size <n> (default: 512)
+ -ctk, --cache-type-k <t> (default: f16)
+ -ctv, --cache-type-v <t> (default: f16)
+ -t, --threads <n> (default: 8)
+ -C, --cpu-mask <hex,hex> (default: 0x0)
+ --cpu-strict <0|1> (default: 0)
+ --poll <0...100> (default: 50)
+ -ngl, --n-gpu-layers <n> (default: 99)
+ -rpc, --rpc <rpc_servers> (default: )
+ -sm, --split-mode <none|layer|row> (default: layer)
+ -mg, --main-gpu <i> (default: 0)
+ -nkvo, --no-kv-offload <0|1> (default: 0)
+ -fa, --flash-attn <0|1> (default: 0)
+ -mmp, --mmap <0|1> (default: 1)
+ --numa <distribute|isolate|numactl> (default: disabled)
+ -embd, --embeddings <0|1> (default: 0)
+ -ts, --tensor-split <ts0/ts1/..> (default: 0)
+ -r, --repetitions <n> (default: 5)
+ --prio <0|1|2|3> (default: 0)
+ --delay <0...N> (seconds) (default: 0)
+ -o, --output <csv|json|jsonl|md|sql> (default: md)
+ -oe, --output-err <csv|json|jsonl|md|sql> (default: none)
+ -v, --verbose (default: 0)
Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.
```
]
```
+
+### JSONL
+
+```sh
+$ ./llama-bench -o jsonl
+```
+
+```json lines
+{"build_commit":"3469684","build_number":1275,"cuda":true,"metal":false,"gpu_blas":true,"blas":true,"cpu_info":"13th Gen Intel(R) Core(TM) i9-13900K","gpu_info":"NVIDIA GeForce RTX 3090 Ti","model_filename":"models/7B/ggml-model-q4_0.gguf","model_type":"llama 7B mostly Q4_0","model_size":3825065984,"model_n_params":6738415616,"n_batch":512,"n_threads":16,"f16_kv":true,"n_gpu_layers":99,"main_gpu":0,"mul_mat_q":true,"tensor_split":"0.00","n_prompt":512,"n_gen":0,"test_time":"2023-09-23T12:09:57Z","avg_ns":212365953,"stddev_ns":985423,"avg_ts":2410.974041,"stddev_ts":11.163766,"samples_ns":[213837238,211635853,212328053,211329715,212698907],"samples_ts":[2394.34,2419.25,2411.36,2422.75,2407.16]}
+{"build_commit":"3469684","build_number":1275,"cuda":true,"metal":false,"gpu_blas":true,"blas":true,"cpu_info":"13th Gen Intel(R) Core(TM) i9-13900K","gpu_info":"NVIDIA GeForce RTX 3090 Ti","model_filename":"models/7B/ggml-model-q4_0.gguf","model_type":"llama 7B mostly Q4_0","model_size":3825065984,"model_n_params":6738415616,"n_batch":512,"n_threads":16,"f16_kv":true,"n_gpu_layers":99,"main_gpu":0,"mul_mat_q":true,"tensor_split":"0.00","n_prompt":0,"n_gen":128,"test_time":"2023-09-23T12:09:59Z","avg_ns":977425219,"stddev_ns":9268593,"avg_ts":130.965708,"stddev_ts":1.238924,"samples_ns":[984472709,974901233,989474741,970729355,967548060],"samples_ts":[130.019,131.295,129.362,131.86,132.293]}
+```
+
+
### SQL
SQL output is suitable for importing into a SQLite database. The output can be piped into the `sqlite3` command line tool to add the results to a database.
}
// command line params
-enum output_formats {NONE, CSV, JSON, MARKDOWN, SQL};
+enum output_formats {NONE, CSV, JSON, JSONL, MARKDOWN, SQL};
static const char * output_format_str(output_formats format) {
switch (format) {
case NONE: return "none";
case CSV: return "csv";
case JSON: return "json";
+ case JSONL: return "jsonl";
case MARKDOWN: return "md";
case SQL: return "sql";
default: GGML_ABORT("invalid output format");
format = CSV;
} else if (s == "json") {
format = JSON;
+ } else if (s == "jsonl") {
+ format = JSONL;
} else if (s == "md") {
format = MARKDOWN;
} else if (s == "sql") {
printf("\n");
printf("options:\n");
printf(" -h, --help\n");
- printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
- printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
- printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
- printf(" -pg <pp,tg> (default: %s)\n", join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str());
- printf(" -b, --batch-size <n> (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str());
- printf(" -ub, --ubatch-size <n> (default: %s)\n", join(cmd_params_defaults.n_ubatch, ",").c_str());
- printf(" -ctk, --cache-type-k <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str());
- printf(" -ctv, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
- printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
- printf(" -C, --cpu-mask <hex,hex> (default: %s)\n", join(cmd_params_defaults.cpu_mask, ",").c_str());
- printf(" --cpu-strict <0|1> (default: %s)\n", join(cmd_params_defaults.cpu_strict, ",").c_str());
- printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str());
- printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
- printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
- printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
- printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
- printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
- printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str());
- printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
- printf(" --numa <distribute|isolate|numactl> (default: disabled)\n");
- printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str());
- printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
- printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
- printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
- printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay);
- printf(" -o, --output <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
- printf(" -oe, --output-err <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
- printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
+ printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
+ printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
+ printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
+ printf(" -pg <pp,tg> (default: %s)\n", join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str());
+ printf(" -b, --batch-size <n> (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str());
+ printf(" -ub, --ubatch-size <n> (default: %s)\n", join(cmd_params_defaults.n_ubatch, ",").c_str());
+ printf(" -ctk, --cache-type-k <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str());
+ printf(" -ctv, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
+ printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
+ printf(" -C, --cpu-mask <hex,hex> (default: %s)\n", join(cmd_params_defaults.cpu_mask, ",").c_str());
+ printf(" --cpu-strict <0|1> (default: %s)\n", join(cmd_params_defaults.cpu_strict, ",").c_str());
+ printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str());
+ printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
+ printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
+ printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
+ printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
+ printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
+ printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str());
+ printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
+ printf(" --numa <distribute|isolate|numactl> (default: disabled)\n");
+ printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str());
+ printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
+ printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
+ printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
+ printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay);
+ printf(" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
+ printf(" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
+ printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
printf("\n");
printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
}
}
};
-struct json_printer : public printer {
- bool first = true;
- static std::string escape_json(const std::string & value) {
- std::string escaped;
- for (auto c : value) {
- if (c == '"') {
- escaped += "\\\"";
- } else if (c == '\\') {
- escaped += "\\\\";
- } else if (c <= 0x1f) {
- char buf[8];
- snprintf(buf, sizeof(buf), "\\u%04x", c);
- escaped += buf;
- } else {
- escaped += c;
- }
+static std::string escape_json(const std::string & value) {
+ std::string escaped;
+ for (auto c : value) {
+ if (c == '"') {
+ escaped += "\\\"";
+ } else if (c == '\\') {
+ escaped += "\\\\";
+ } else if (c <= 0x1f) {
+ char buf[8];
+ snprintf(buf, sizeof(buf), "\\u%04x", c);
+ escaped += buf;
+ } else {
+ escaped += c;
}
- return escaped;
}
+ return escaped;
+}
- static std::string format_value(const std::string & field, const std::string & value) {
- switch (test::get_field_type(field)) {
- case test::STRING:
- return "\"" + escape_json(value) + "\"";
- case test::BOOL:
- return value == "0" ? "false" : "true";
- default:
- return value;
- }
+static std::string format_json_value(const std::string & field, const std::string & value) {
+ switch (test::get_field_type(field)) {
+ case test::STRING:
+ return "\"" + escape_json(value) + "\"";
+ case test::BOOL:
+ return value == "0" ? "false" : "true";
+ default:
+ return value;
}
+}
+
+struct json_printer : public printer {
+ bool first = true;
void print_header(const cmd_params & params) override {
fprintf(fout, "[\n");
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
assert(fields.size() == values.size());
for (size_t i = 0; i < fields.size(); i++) {
- fprintf(fout, " \"%s\": %s,\n", fields.at(i).c_str(), format_value(fields.at(i), values.at(i)).c_str());
+ fprintf(fout, " \"%s\": %s,\n", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str());
}
}
}
};
+
+struct jsonl_printer : public printer {
+ void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
+ assert(fields.size() == values.size());
+ for (size_t i = 0; i < fields.size(); i++) {
+ fprintf(fout, "\"%s\": %s, ", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str());
+ }
+ }
+
+ void print_test(const test & t) override {
+ fprintf(fout, "{");
+ print_fields(test::get_fields(), t.get_values());
+ fprintf(fout, "\"samples_ns\": [ %s ],", join(t.samples_ns, ", ").c_str());
+ fprintf(fout, "\"samples_ts\": [ %s ]", join(t.get_ts(), ", ").c_str());
+ fprintf(fout, "}\n");
+ fflush(fout);
+ }
+};
+
struct markdown_printer : public printer {
std::vector<std::string> fields;
return std::unique_ptr<printer>(new csv_printer());
case JSON:
return std::unique_ptr<printer>(new json_printer());
+ case JSONL:
+ return std::unique_ptr<printer>(new jsonl_printer());
case MARKDOWN:
return std::unique_ptr<printer>(new markdown_printer());
case SQL: