metal : release buffers when freeing metal context (#2062)

author Aaron Miller <redacted>

Sat, 1 Jul 2023 18:14:59 +0000 (11:14 -0700)

committer GitHub <redacted>

Sat, 1 Jul 2023 18:14:59 +0000 (21:14 +0300)
author Aaron Miller <redacted>
Sat, 1 Jul 2023 18:14:59 +0000 (11:14 -0700)
committer GitHub <redacted>
Sat, 1 Jul 2023 18:14:59 +0000 (21:14 +0300)
diff --git a/ggml-metal.m b/ggml-metal.m

index 7551231b9cf32cf2705de66fce9a15f339943370..fd69c41fe357d6e1636f2640186c7908352450b9 100644 (file)
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -202,7 +202,9 @@ struct ggml_metal_context * ggml_metal_init(void) {
  
  void ggml_metal_free(struct ggml_metal_context * ctx) {
      fprintf(stderr, "%s: deallocating\n", __func__);
-
+    for (int i = 0; i < ctx->n_buffers; ++i) {
+        [ctx->buffers[i].metal release];
+    }
      free(ctx);
  }
  
diff --git a/llama.cpp b/llama.cpp

index 69c2ab01b01e7913dcacba407bd5f05f31484a32..561accf88a4f96944c523427d1db63913555e949 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -253,7 +253,13 @@ struct llama_model {
  
  struct llama_context {
      llama_context(const llama_model & model, const llama_vocab & vocab) : model(model), vocab(vocab), t_load_us(model.t_load_us), t_start_us(model.t_start_us) {}
-
+#ifdef GGML_USE_METAL
+    ~llama_context() {
+        if (ctx_metal) {
+            ggml_metal_free(ctx_metal);
+        }
+    }
+#endif
      std::mt19937 rng;
  
      bool has_evaluated_once = false;
author	Aaron Miller <redacted>
	Sat, 1 Jul 2023 18:14:59 +0000 (11:14 -0700)
committer	GitHub <redacted>
	Sat, 1 Jul 2023 18:14:59 +0000 (21:14 +0300)
ggml-metal.m		patch \| blob \| history
llama.cpp		patch \| blob \| history