ggml : better PERF prints + support "LLAMA_PERF=1 make"

author Georgi Gerganov <redacted>

Sun, 23 Apr 2023 15:15:39 +0000 (18:15 +0300)

committer Georgi Gerganov <redacted>

Sun, 23 Apr 2023 15:15:39 +0000 (18:15 +0300)
author Georgi Gerganov <redacted>
Sun, 23 Apr 2023 15:15:39 +0000 (18:15 +0300)
committer Georgi Gerganov <redacted>
Sun, 23 Apr 2023 15:15:39 +0000 (18:15 +0300)
diff --git a/Makefile b/Makefile

index b297959c937da30627a994b2bdbc533d968714d7..0c7b6548dda1e0924e0070c02157e0fec656406a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -117,6 +117,10 @@ ifdef LLAMA_GPROF
         CFLAGS   += -pg
         CXXFLAGS += -pg
  endif
+ifdef LLAMA_PERF
+       CFLAGS   += -DGGML_PERF
+       CXXFLAGS += -DGGML_PERF
+endif
  ifneq ($(filter aarch64%,$(UNAME_M)),)
         CFLAGS   += -mcpu=native
         CXXFLAGS += -mcpu=native
diff --git a/ggml.c b/ggml.c

index 3ee2d0814ec04c07c1c484aa2dd74ff1fced5a8d..23dae2d9b767912d3ca819d9393cbbabe8d19a73 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -11239,7 +11239,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
  
          perf_total_per_op_us[node->op] += node->perf_time_us;
  
-        GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 ", %" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
+        GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
                  i,
                  node->ne[0], node->ne[1], node->ne[2],
                  GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
@@ -11253,7 +11253,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
      for (int i = 0; i < cgraph->n_leafs; i++) {
          struct ggml_tensor * node = cgraph->leafs[i];
  
-        GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 "] %8s\n",
+        GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
                  i,
                  node->ne[0], node->ne[1],
                  GGML_OP_LABEL[node->op]);
diff --git a/llama.cpp b/llama.cpp

index 34327ecfab4fe22a3ceabf28754465c5b2dccb21..8c1d65778be8bb844588cd4e54761dc4bd121cd3 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -1250,9 +1250,11 @@ static bool llama_eval_internal(
      ggml_build_forward_expand(&gf, inpL);
      ggml_graph_compute       (ctx0, &gf);
  
+#ifdef GGML_PERF
      // print timing information per ggml operation (for debugging purposes)
      // requires GGML_PERF to be defined
-    //ggml_graph_print(&gf);
+    ggml_graph_print(&gf);
+#endif
  
      // plot the computation graph in dot format (for debugging purposes)
      //if (n_past%100 == 0) {
author	Georgi Gerganov <redacted>
	Sun, 23 Apr 2023 15:15:39 +0000 (18:15 +0300)
committer	Georgi Gerganov <redacted>
	Sun, 23 Apr 2023 15:15:39 +0000 (18:15 +0300)
Makefile		patch \| blob \| history
ggml.c		patch \| blob \| history
llama.cpp		patch \| blob \| history