Ensure --mlock works properly with mmap() support

author Justine Tunney <redacted>

Thu, 30 Mar 2023 08:53:36 +0000 (01:53 -0700)

committer Justine Tunney <redacted>

Thu, 30 Mar 2023 19:28:25 +0000 (12:28 -0700)
author Justine Tunney <redacted>
Thu, 30 Mar 2023 08:53:36 +0000 (01:53 -0700)
committer Justine Tunney <redacted>
Thu, 30 Mar 2023 19:28:25 +0000 (12:28 -0700)
diff --git a/ggml.c b/ggml.c

index 4ea7159576198fdc575ab94e8cb3133c09b0142a..25fa726320df20bf695e8506895a2bf39650b59a 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -2884,36 +2884,47 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
      return result;
  }
  
+#ifdef __APPLE__
+#define MLOCK_SUGGESTION \
+    "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \
+    "decreasing 'vm.global_no_user_wire_amount'.  Also try increasing RLIMIT_MLOCK (ulimit -l).\n"
+#else
+#define MLOCK_SUGGESTION \
+    "Try increasing RLIMIT_MLOCK ('ulimit -l' as root).\n"
+#endif
+
  bool ggml_mlock_supported(void) {
      return GGML_MLOCK_SUPPORT;
  }
  
+bool ggml_mlock(
+        struct ggml_context * ctx,
+        const void *opt_extra_addr,
+        size_t opt_extra_len,
+        char **err_p) {
+    // TODO: Use SetProcessWorkingSetSize() + VirtualLock() on WIN32
  #if GGML_MLOCK_SUPPORT
-#ifdef __APPLE__
-    #define MLOCK_SUGGESTION "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or\n" \
-                             "decreasing 'vm.global_no_user_wire_amount'.  Also try increasing RLIMIT_MLOCK (ulimit -l)."
-#else
-    #define MLOCK_SUGGESTION "Try increasing RLIMIT_MLOCK (ulimit -l)."
-#endif
-bool ggml_mlock(struct ggml_context * ctx, char ** err_p) {
      if (ctx->mem_buffer_mlocked) {
          return true;
      }
-    if (mlock(ctx->mem_buffer, ctx->mem_size)) {
-        int ret = asprintf(err_p, "failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION,
-                           ctx->mem_size, strerror(errno));
-        GGML_ASSERT(ret >= 0);
+    if (mlock(ctx->mem_buffer, ctx->mem_size) ||
+        (opt_extra_len &&
+         mlock(opt_extra_addr, opt_extra_len))) {
+        if ((*err_p = malloc(1024))) {
+            snprintf(*err_p, 1024,
+                     "failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION,
+                     ctx->mem_size + opt_extra_len,
+                     strerror(errno));
+        }
          return false;
      }
      ctx->mem_buffer_mlocked = true;
      return true;
-}
  #else // GGML_MLOCK_SUPPORT
-bool ggml_mlock(struct ggml_context * ctx, char ** err_p) {
      *err_p = strdup("can't mlock because it's not supported on this system");
      return false;
-}
  #endif // GGML_MLOCK_SUPPORT
+}
  
  ////////////////////////////////////////////////////////////////////////////////
  
diff --git a/ggml.h b/ggml.h

index 058dfe23065165822be883532c92de56ac7adf21..f7791ed11f084d8f5ee8f0c3a2d5720beb7f8c79 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -345,7 +345,11 @@ size_t ggml_used_mem(const struct ggml_context * ctx);
  size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
  
  bool ggml_mlock_supported(void);
-bool ggml_mlock(struct ggml_context * ctx, char ** err_p);
+bool ggml_mlock(
+        struct ggml_context * ctx,
+        const void *opt_extra_addr,
+        size_t opt_extra_len,
+        char **err_p);
  
  struct ggml_tensor * ggml_new_tensor(
          struct ggml_context * ctx,
diff --git a/llama.cpp b/llama.cpp

index b00e065230433ad744e96ee5faa65ffe0b4756d5..28e885cef402a4a11d52aeb0f7d69561056152ef 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -1595,7 +1595,10 @@ struct llama_context * llama_init_from_file(
  
      if (params.use_mlock) {
          char *err;
-        if (!ggml_mlock(ctx->model.ctx, &err)) {
+        if (!ggml_mlock(ctx->model.ctx,
+                        ctx->model.mm_addr,
+                        ctx->model.mm_length,
+                        &err)) {
              fprintf(stderr, "%s\n", err);
              free(err);
              llama_free(ctx);
author	Justine Tunney <redacted>
	Thu, 30 Mar 2023 08:53:36 +0000 (01:53 -0700)
committer	Justine Tunney <redacted>
	Thu, 30 Mar 2023 19:28:25 +0000 (12:28 -0700)
ggml.c		patch \| blob \| history
ggml.h		patch \| blob \| history
llama.cpp		patch \| blob \| history