metal : create autorelease pool during library build (#4970)

author Georgi Gerganov <redacted>

Wed, 17 Jan 2024 16:38:39 +0000 (18:38 +0200)

committer GitHub <redacted>

Wed, 17 Jan 2024 16:38:39 +0000 (18:38 +0200)
author Georgi Gerganov <redacted>
Wed, 17 Jan 2024 16:38:39 +0000 (18:38 +0200)
committer GitHub <redacted>
Wed, 17 Jan 2024 16:38:39 +0000 (18:38 +0200)
diff --git a/.gitignore b/.gitignore

index fba207045344c133b6b433d575c998c97c82dad4..5ab81445d98f196d09a45b01487c6eb10ee4c1a9 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -105,3 +105,4 @@ poetry.toml
  /tests/test-tokenizer-1-bpe
  /tests/test-rope
  /tests/test-backend-ops
+/tests/test-autorelease
diff --git a/Makefile b/Makefile

index 995b89f7adac9637eea7a42df579aa99b2d003ca..a8658a596eee061b57b825a786dbc57d2a0e4673 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ TEST_TARGETS = \
         tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
         tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama          \
         tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope      \
-       tests/test-backend-ops
+       tests/test-backend-ops tests/test-autorelease
  
  # Code coverage output files
  COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -747,3 +747,6 @@ tests/test-c.o: tests/test-c.c llama.h
  
  tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
         $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
+tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+       $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
diff --git a/ci/run.sh b/ci/run.sh

index 47a254f4cf1e880b2ab7cd683458c0f66ed148b6..86293f0dbdfd6a48632f51efb71db62dbe43f74f 100755 (executable)
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -179,6 +179,8 @@ function gg_run_open_llama_3b_v2 {
  
      wiki_test_60="${path_wiki}/wiki.test-60.raw"
  
+    ./bin/test-autorelease ${model_f16}
+
      ./bin/quantize ${model_f16} ${model_q8_0} q8_0
      ./bin/quantize ${model_f16} ${model_q4_0} q4_0
      ./bin/quantize ${model_f16} ${model_q4_1} q4_1
diff --git a/ggml-metal.m b/ggml-metal.m

index 8bb4edd64db2e6afb05cb61e0f8c1a37a267e172..66d4d675eb32fb60e57f4a7fd5cac9795f9c205e 100644 (file)
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -303,22 +303,21 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
                  return NULL;
              }
  
-            // dictionary of preprocessor macros
-            NSMutableDictionary * prep = [NSMutableDictionary dictionary];
+            @autoreleasepool {
+                // dictionary of preprocessor macros
+                NSMutableDictionary * prep = [NSMutableDictionary dictionary];
  
  #ifdef GGML_QKK_64
-            prep[@"QK_K"] = @(64);
+                prep[@"QK_K"] = @(64);
  #endif
  
-            MTLCompileOptions* options = [MTLCompileOptions new];
-            options.preprocessorMacros = prep;
+                MTLCompileOptions* options = [MTLCompileOptions new];
+                options.preprocessorMacros = prep;
  
-            //[options setFastMathEnabled:false];
+                //[options setFastMathEnabled:false];
  
-            ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
-
-            [options release];
-            [prep release];
+                ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
+            }
          }
  
          if (error) {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt

index 7c932240de82dd4dbd53807a83b7b461085af8b2..d7aaab8430faf1956b44c056c7f8c5d4c624dec8 100644 (file)
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -49,6 +49,7 @@ llama_build_and_test_executable(test-llama-grammar.cpp)
  llama_build_and_test_executable(test-grad0.cpp)
  # llama_build_and_test_executable(test-opt.cpp) # SLOW
  llama_build_and_test_executable(test-backend-ops.cpp)
+llama_build_and_test_executable(test-autorelease.cpp)
  
  llama_build_and_test_executable(test-rope.cpp)
  
diff --git a/tests/test-autorelease.cpp b/tests/test-autorelease.cpp

new file mode 100644 (file)

index 0000000..289c6ba
--- /dev/null
+++ b/tests/test-autorelease.cpp
@@ -0,0 +1,28 @@
+// ref: https://github.com/ggerganov/llama.cpp/issues/4952#issuecomment-1892864763
+
+#include <cstdio>
+#include <string>
+#include <thread>
+
+#include "llama.h"
+
+// This creates a new context inside a pthread and then tries to exit cleanly.
+int main(int argc, char ** argv) {
+    if (argc < 2) {
+        printf("Usage: %s model.gguf\n", argv[0]);
+        return 0; // intentionally return success
+    }
+
+    const std::string fname = argv[1];
+
+    std::thread([&fname]() {
+        llama_backend_init(false);
+        auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params());
+        auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
+        llama_free(ctx);
+        llama_free_model(model);
+        llama_backend_free();
+    }).join();
+
+    return 0;
+}
author	Georgi Gerganov <redacted>
	Wed, 17 Jan 2024 16:38:39 +0000 (18:38 +0200)
committer	GitHub <redacted>
	Wed, 17 Jan 2024 16:38:39 +0000 (18:38 +0200)
.gitignore		patch \| blob \| history
Makefile		patch \| blob \| history
ci/run.sh		patch \| blob \| history
ggml-metal.m		patch \| blob \| history
tests/CMakeLists.txt		patch \| blob \| history
tests/test-autorelease.cpp	[new file with mode: 0644]	patch \| blob