From: Aaron Miller Date: Sat, 1 Jul 2023 18:14:59 +0000 (-0700) Subject: metal : release buffers when freeing metal context (#2062) X-Git-Tag: gguf-v0.4.0~536 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=2f8cd979ecd1fa582852e7136e92ff8990b98fd8;p=pkg%2Fggml%2Fsources%2Fllama.cpp metal : release buffers when freeing metal context (#2062) --- diff --git a/ggml-metal.m b/ggml-metal.m index 7551231b..fd69c41f 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -202,7 +202,9 @@ struct ggml_metal_context * ggml_metal_init(void) { void ggml_metal_free(struct ggml_metal_context * ctx) { fprintf(stderr, "%s: deallocating\n", __func__); - + for (int i = 0; i < ctx->n_buffers; ++i) { + [ctx->buffers[i].metal release]; + } free(ctx); } diff --git a/llama.cpp b/llama.cpp index 69c2ab01..561accf8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -253,7 +253,13 @@ struct llama_model { struct llama_context { llama_context(const llama_model & model, const llama_vocab & vocab) : model(model), vocab(vocab), t_load_us(model.t_load_us), t_start_us(model.t_start_us) {} - +#ifdef GGML_USE_METAL + ~llama_context() { + if (ctx_metal) { + ggml_metal_free(ctx_metal); + } + } +#endif std::mt19937 rng; bool has_evaluated_once = false;