#include <CL/cl.h>
+#include <inttypes.h>
#include <string.h>
#include <cstddef>
#include <cstdint>
-#include <atomic>
#include <fstream>
-#include <limits>
#include <vector>
#include <string>
#include <cmath>
}
// Dump a csv
- float total_kernel_time = 0;
- fprintf(fperf, "op name, kernel name, queued duration (ms), submit duration(ms), exec duration (ms), complete duration (ms), total duration (ms), global size, local size, output size\n");
+ fprintf(fperf, "op name, kernel name, exec duration (ms), global size, local size, output size\n");
for (const ProfilingInfo & info : profiling_info) {
- total_kernel_time += info.cmd_duration_ns/1.e6f;
- fprintf(fperf, "%s,%s,%f,%f,%f,%f,%f,%zux%zux%zu,%zux%zux%zu,%zux%zux%zux%zu\n",
+ fprintf(fperf, "%s,%s,%f,%zux%zux%zu,%zux%zux%zu,%zux%zux%zux%zu\n",
info.op_name.c_str(), info.kernel_name.c_str(),
- info.cmd_queued_duration_ns/1.e6f,
- info.cmd_submit_duration_ns/1.e6f,
info.cmd_duration_ns/1.e6f,
- info.cmd_complete_duration_ns/1.e6f,
- info.cmd_total_duration_ns/1.e6f,
info.global_size[0], info.global_size[1], info.global_size[2],
info.local_size[0], info.local_size[1], info.local_size[2],
info.output_size[0], info.output_size[1], info.output_size[2], info.output_size[3]);
}
fclose(fperf);
- GGML_LOG_INFO("ggml_opencl: total kernel time: %f\n", total_kernel_time);
-
// Dump a simple chrome trace
FILE* ftrace = fopen("cl_trace.json", "w");
if (!ftrace) {
fprintf(ftrace, "[\n");
for (const ProfilingInfo & info : profiling_info) {
- fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Host\"},\n",
+ fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Host\"},\n",
info.kernel_name.c_str(), info.cmd_queued/1000);
- fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Host\"},\n",
+ fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Host\"},\n",
info.kernel_name.c_str(), info.cmd_submit/1000);
- fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Device\"},\n",
+ fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Device\"},\n",
info.kernel_name.c_str(), info.cmd_start/1000);
- fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %llu, \"pid\": \"\", \"tid\": \"Device\"},\n",
+ fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Device\"},\n",
info.kernel_name.c_str(), info.cmd_end/1000);
}
fclose(ftrace);
const cl_ulong nb21 = src2->nb[1];
const cl_ulong nb20 = src2->nb[0];
+ UNUSED(nb20);
+
const int ne0 = dst->ne[0];
const int ne1 = dst->ne[1];