llama : fix mlock with no-mmap with Metal (#5025)

author slaren <redacted>

Thu, 18 Jan 2024 20:12:15 +0000 (21:12 +0100)

committer GitHub <redacted>

Thu, 18 Jan 2024 20:12:15 +0000 (21:12 +0100)
author slaren <redacted>
Thu, 18 Jan 2024 20:12:15 +0000 (21:12 +0100)
committer GitHub <redacted>
Thu, 18 Jan 2024 20:12:15 +0000 (21:12 +0100)
diff --git a/llama.cpp b/llama.cpp

index d28382f7d47b7ab9bfb7f44b37531890fad3feb0..f1d00a96c667ca13e1e3d23474e94921b497147f 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -1599,7 +1599,7 @@ struct llama_model {
      std::unique_ptr<llama_mmap> mapping;
  
      // objects representing data potentially being locked in memory
-    llama_mlock mlock_buf;
+    std::vector<std::unique_ptr<llama_mlock>> mlock_bufs;
      llama_mlock mlock_mmap;
  
      // for quantize-stats only
@@ -3815,8 +3815,10 @@ static bool llm_load_tensors(
          else {
              buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
              if (buf != nullptr && use_mlock && ggml_backend_buffer_is_host(buf)) {
-                model.mlock_buf.init   (ggml_backend_buffer_get_base(buf));
-                model.mlock_buf.grow_to(ggml_backend_buffer_get_size(buf));
+                model.mlock_bufs.emplace_back(new llama_mlock);
+                auto & mlock_buf = model.mlock_bufs.back();
+                mlock_buf->init   (ggml_backend_buffer_get_base(buf));
+                mlock_buf->grow_to(ggml_backend_buffer_get_size(buf));
              }
          }
          if (buf == nullptr) {