bench : multi-thread memcpy (#1534)

author Georgi Gerganov <redacted>

Tue, 21 Nov 2023 20:07:30 +0000 (22:07 +0200)

committer GitHub <redacted>

Tue, 21 Nov 2023 20:07:30 +0000 (22:07 +0200)
author Georgi Gerganov <redacted>
Tue, 21 Nov 2023 20:07:30 +0000 (22:07 +0200)
committer GitHub <redacted>
Tue, 21 Nov 2023 20:07:30 +0000 (22:07 +0200)
diff --git a/whisper.cpp b/whisper.cpp

index 8f457239e64e1589e51e3838b634b36bee36ec9d..03001902b07b8a37dbec8cd43e602ab4f93c57e1 100644 (file)
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -6064,6 +6064,43 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
      // 1GB array
      const size_t size = arr*1e6;
  
+    double sum  = 0.0;
+
+    // heat-up
+    {
+        char * src = (char *) malloc(size);
+        char * dst = (char *) malloc(size);
+
+        for (size_t i = 0; i < size; i++) src[i] = i;
+
+        memcpy(dst, src, size); // heat-up
+
+        double tsum = 0.0;
+
+        for (size_t i = 0; i < n; i++) {
+            const int64_t t0 = ggml_time_us();
+
+            memcpy(dst, src, size);
+
+            const int64_t t1 = ggml_time_us();
+
+            tsum += (t1 - t0)*1e-6;
+
+            src[rand() % size] = rand() % 256;
+        }
+
+        snprintf(strbuf, sizeof(strbuf), "memcpy: %7.2f GB/s (heat-up)\n", (double) (n*size)/(tsum*1e9));
+        s += strbuf;
+
+        // needed to prevent the compiler from optimizing the memcpy away
+        {
+            for (size_t i = 0; i < size; i++) sum += dst[i];
+        }
+
+        free(src);
+        free(dst);
+    }
+
      // single-thread
      {
          char * src = (char *) malloc(size);
@@ -6074,7 +6111,6 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
          memcpy(dst, src, size); // heat-up
  
          double tsum = 0.0;
-        double sum  = 0.0;
  
          for (size_t i = 0; i < n; i++) {
              const int64_t t0 = ggml_time_us();
@@ -6088,21 +6124,73 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
              src[rand() % size] = rand() % 256;
          }
  
-        snprintf(strbuf, sizeof(strbuf), "memcpy: %.2f GB/s (1 thread)\n", (double) (n*size)/(tsum*1e9));
+        snprintf(strbuf, sizeof(strbuf), "memcpy: %7.2f GB/s ( 1 thread)\n", (double) (n*size)/(tsum*1e9));
          s += strbuf;
  
          // needed to prevent the compiler from optimizing the memcpy away
          {
              for (size_t i = 0; i < size; i++) sum += dst[i];
+        }
+
+        free(src);
+        free(dst);
+    }
+
+    // multi-thread
+
+    for (uint32_t n_threads = 1; n_threads <= std::thread::hardware_concurrency(); n_threads++) {
+        char * src = (char *) malloc(size);
+        char * dst = (char *) malloc(size);
+
+        for (size_t i = 0; i < size; i++) src[i] = i;
+
+        memcpy(dst, src, size); // heat-up
+
+        double tsum = 0.0;
+
+        auto helper = [&](int th) {
+            const int64_t i0 = (th + 0)*size/n_threads;
+            const int64_t i1 = (th + 1)*size/n_threads;
+
+            for (size_t i = 0; i < n; i++) {
+                memcpy(dst + i0, src + i0, i1 - i0);
  
-            snprintf(strbuf, sizeof(strbuf), "sum:    %f\n", sum);
-            s += strbuf;
+                src[i0 + rand() % (i1 - i0)] = rand() % 256;
+            };
+        };
+
+        const int64_t t0 = ggml_time_us();
+
+        std::vector<std::thread> threads(n_threads - 1);
+        for (uint32_t th = 0; th < n_threads - 1; ++th) {
+            threads[th] = std::thread(helper, th);
+        }
+
+        helper(n_threads - 1);
+
+        for (uint32_t th = 0; th < n_threads - 1; ++th) {
+            threads[th].join();
+        }
+
+        const int64_t t1 = ggml_time_us();
+
+        tsum += (t1 - t0)*1e-6;
+
+        snprintf(strbuf, sizeof(strbuf), "memcpy: %7.2f GB/s (%2d thread)\n", (double) (n*size)/(tsum*1e9), n_threads);
+        s += strbuf;
+
+        // needed to prevent the compiler from optimizing the memcpy away
+        {
+            for (size_t i = 0; i < size; i++) sum += dst[i];
          }
  
          free(src);
          free(dst);
      }
  
+    snprintf(strbuf, sizeof(strbuf), "sum:    %f\n", sum);
+    s += strbuf;
+
      return s.c_str();
  }
author	Georgi Gerganov <redacted>
	Tue, 21 Nov 2023 20:07:30 +0000 (22:07 +0200)
committer	GitHub <redacted>
	Tue, 21 Nov 2023 20:07:30 +0000 (22:07 +0200)