llama : fix platforms without mmap (#4578)

author slaren <redacted>

Fri, 22 Dec 2023 11:12:53 +0000 (12:12 +0100)

committer GitHub <redacted>

Fri, 22 Dec 2023 11:12:53 +0000 (13:12 +0200)
author slaren <redacted>
Fri, 22 Dec 2023 11:12:53 +0000 (12:12 +0100)
committer GitHub <redacted>
Fri, 22 Dec 2023 11:12:53 +0000 (13:12 +0200)
diff --git a/ggml-cuda.cu b/ggml-cuda.cu

index ac91ee12e3428aea29022330cb7305c965fb493a..37d7f27925009ab6d4b83aca787d72e5dc987fea 100644 (file)
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -7702,7 +7702,8 @@ inline void ggml_cuda_op_scale(
      GGML_ASSERT(src0->type == GGML_TYPE_F32);
      GGML_ASSERT( dst->type == GGML_TYPE_F32);
  
-    const float scale = ((float *) dst->op_params)[0];
+    float scale;
+    memcpy(&scale, dst->op_params, sizeof(float));
  
      scale_f32_cuda(src0_dd, dst_dd, scale, ggml_nelements(src0), main_stream);
      CUDA_CHECK(cudaGetLastError());
diff --git a/ggml.c b/ggml.c

index 15e1984d1d2a17bbd65289cf1af4005bf759ce8e..3656422d73767396c3f29c370464c84d5737f1dd 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -10335,7 +10335,8 @@ static void ggml_compute_forward_scale_f32(
      }
  
      // scale factor
-    const float v = *(float *) dst->op_params;
+    float v;
+    memcpy(&v, dst->op_params, sizeof(float));
  
      const int ith = params->ith;
      const int nth = params->nth;
@@ -15152,7 +15153,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
              {
                  // necessary for llama
                  if (src0->grad) {
-                    const float s = ((float *) tensor->op_params)[0];
+                    float s;
+                    memcpy(&s, tensor->op_params, sizeof(float));
  
                      src0->grad =
                          ggml_add_or_set(ctx,
diff --git a/llama.cpp b/llama.cpp

index cb0546c952d20f3e6110205c5a5a004bd4b3119e..4e4495739bbbdd23be2803211b810678aa9c62e0 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -778,7 +778,7 @@ struct llama_file {
              throw std::runtime_error(format("read error: %s", strerror(errno)));
          }
          if (ret != 1) {
-            throw std::runtime_error(std::string("unexpectedly reached end of file"));
+            throw std::runtime_error("unexpectedly reached end of file");
          }
      }
  
@@ -931,29 +931,29 @@ struct llama_mmap {
  #elif defined(_WIN32)
      static constexpr bool SUPPORTED = true;
  
-    llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
-        (void) numa;
+    llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1, bool numa = false) {
+        GGML_UNUSED(numa);
  
          size = file->size;
  
          HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
  
          HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
-        DWORD error = GetLastError();
  
          if (hMapping == NULL) {
+            DWORD error = GetLastError();
              throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
          }
  
          addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
-        error = GetLastError();
+        DWORD error = GetLastError();
          CloseHandle(hMapping);
  
          if (addr == NULL) {
              throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
          }
  
-        if (prefetch) {
+        if (prefetch > 0) {
              // PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it
              BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
              HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
@@ -965,9 +965,9 @@ struct llama_mmap {
                  // advise the kernel to preload the mapped memory
                  WIN32_MEMORY_RANGE_ENTRY range;
                  range.VirtualAddress = addr;
-                range.NumberOfBytes = (SIZE_T)size;
+                range.NumberOfBytes = (SIZE_T) std::min(size, prefetch);
                  if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
-                    fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n",
+                    LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
                              llama_format_win_err(GetLastError()).c_str());
                  }
              }
@@ -982,26 +982,26 @@ struct llama_mmap {
  
      ~llama_mmap() {
          if (!UnmapViewOfFile(addr)) {
-            fprintf(stderr, "warning: UnmapViewOfFile failed: %s\n",
+            LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n",
                      llama_format_win_err(GetLastError()).c_str());
          }
      }
  #else
      static constexpr bool SUPPORTED = false;
  
-    llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
-        (void) file;
-        (void) prefetch;
-        (void) numa;
+    llama_mmap(struct llama_file * file, size_t prefetch = -1, bool numa = false) {
+        GGML_UNUSED(file);
+        GGML_UNUSED(prefetch);
+        GGML_UNUSED(numa);
  
-        throw std::runtime_error(std::string("mmap not supported"));
+        throw std::runtime_error("mmap not supported");
      }
  
-    void unmap(size_t offset, size_t len) {
-        (void) offset;
-        (void) len;
+    void unmap_fragment(size_t first, size_t last) {
+        GGML_UNUSED(first);
+        GGML_UNUSED(last);
  
-        throw std::runtime_error(std::string("mmap not supported"));
+        throw std::runtime_error("mmap not supported");
      }
  #endif
  };
author	slaren <redacted>
	Fri, 22 Dec 2023 11:12:53 +0000 (12:12 +0100)
committer	GitHub <redacted>
	Fri, 22 Dec 2023 11:12:53 +0000 (13:12 +0200)
ggml-cuda.cu		patch \| blob \| history
ggml.c		patch \| blob \| history
llama.cpp		patch \| blob \| history