repo : update links to new url (llama/11886)

author Georgi Gerganov <redacted>

Sat, 15 Feb 2025 14:40:57 +0000 (16:40 +0200)

committer Georgi Gerganov <redacted>

Thu, 27 Feb 2025 06:55:36 +0000 (08:55 +0200)
author Georgi Gerganov <redacted>
Sat, 15 Feb 2025 14:40:57 +0000 (16:40 +0200)
committer Georgi Gerganov <redacted>
Thu, 27 Feb 2025 06:55:36 +0000 (08:55 +0200)
diff --git a/ggml/include/ggml-cpu.h b/ggml/include/ggml-cpu.h

index 3aa71badb5fb083ffea5e8ef8b61f49848cb75df..d23c6b262e202159ac6c890a5706fc1c01582250 100644 (file)
--- a/ggml/include/ggml-cpu.h
+++ b/ggml/include/ggml-cpu.h
@@ -8,7 +8,7 @@ extern "C" {
  #endif
  
      // the compute plan that needs to be prepared for ggml_graph_compute()
-    // since https://github.com/ggerganov/ggml/issues/287
+    // since https://github.com/ggml-org/ggml/issues/287
      struct ggml_cplan {
          size_t    work_size; // size of work buffer, calculated by `ggml_graph_plan()`
          uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
diff --git a/ggml/include/ggml-metal.h b/ggml/include/ggml-metal.h

index 669c1f84ae6e33c7e14520a10b1efc931f7d772b..a610694423483a2735ae8213fa690a52eaf08073 100644 (file)
--- a/ggml/include/ggml-metal.h
+++ b/ggml/include/ggml-metal.h
@@ -45,7 +45,7 @@ GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
  
  GGML_DEPRECATED(
          GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
-        "obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
+        "obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
  
  GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
  
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c

index 0cbf8318bedb90c663cf374eb447ccf86dd74bfa..dbef5df2111c6c4ca8588fee5cce9f184022f3c2 100644 (file)
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -1816,7 +1816,7 @@ inline static float ggml_silu_f32(float x) {
  
  #if __FINITE_MATH_ONLY__
  #error "some routines in ggml.c require non-finite math arithmetics -- pass -fno-finite-math-only to the compiler to fix"
-#error "ref: https://github.com/ggerganov/llama.cpp/pull/7154#issuecomment-2143844461"
+#error "ref: https://github.com/ggml-org/llama.cpp/pull/7154#issuecomment-2143844461"
  #endif
  
  #if defined(__ARM_NEON) && defined(__aarch64__)
@@ -7574,7 +7574,7 @@ UseGgmlGemm2:;
      int64_t nchunk1 = (nr1 + chunk_size - 1) / chunk_size;
  
      // If the chunking is poor for the number of threads on this setup, scrap the whole plan.  Re-chunk it by thread.
-    //   Also, chunking by thread was measured to have perform better on NUMA systems.  See https://github.com/ggerganov/llama.cpp/pull/6915
+    //   Also, chunking by thread was measured to have perform better on NUMA systems.  See https://github.com/ggml-org/llama.cpp/pull/6915
      //   In theory, chunking should be just as useful on NUMA and non NUMA systems, but testing disagreed with that.
      if (nchunk0 * nchunk1 < nth * 4 || ggml_is_numa()) {
          // distribute the thread work across the inner or outer loop based on which one is larger
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m

index 944d90af3443204eee8e28b78e5d98182289c921..0add6b51a406d9e3148584e25f6ff2891e875bff 100644 (file)
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -1983,7 +1983,7 @@ static void ggml_metal_encode_node(
                  const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
  
                  // TODO: add ggml_metal_kargs struct
-                // TODO: optimize (see https://github.com/ggerganov/llama.cpp/pull/10238/commits/7941b6b9ec29a2866fec6fa6c51612515ca509f6)
+                // TODO: optimize (see https://github.com/ggml-org/llama.cpp/pull/10238/commits/7941b6b9ec29a2866fec6fa6c51612515ca509f6)
                  [encoder setComputePipelineState:pipeline];
                  [encoder setBuffer:id_src0 offset:offs_src0   atIndex:0];
                  if (id_src1) {
diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal

index 44f04c909bfb2d59c4e17ac9b938275467e79ed7..da415184b173ca286980474111de937bc2be3d3a 100644 (file)
--- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal
@@ -1058,7 +1058,7 @@ kernel void kernel_soft_max(
      }
  
      // This barrier fixes a failing test
-    // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335
+    // ref: https://github.com/ggml-org/ggml/pull/621#discussion_r1425156335
      threadgroup_barrier(mem_flags::mem_none);
  
      float sum = simd_sum(lsum);
@@ -1163,7 +1163,7 @@ kernel void kernel_soft_max_4(
      const float lsum = lsum4[0] + lsum4[1] + lsum4[2] + lsum4[3];
  
      // This barrier fixes a failing test
-    // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335
+    // ref: https://github.com/ggml-org/ggml/pull/621#discussion_r1425156335
      threadgroup_barrier(mem_flags::mem_none);
  
      float sum = simd_sum(lsum);
author	Georgi Gerganov <redacted>
	Sat, 15 Feb 2025 14:40:57 +0000 (16:40 +0200)
committer	Georgi Gerganov <redacted>
	Thu, 27 Feb 2025 06:55:36 +0000 (08:55 +0200)
ggml/include/ggml-cpu.h		patch \| blob \| history
ggml/include/ggml-metal.h		patch \| blob \| history
ggml/src/ggml-cpu/ggml-cpu.c		patch \| blob \| history
ggml/src/ggml-metal/ggml-metal.m		patch \| blob \| history
ggml/src/ggml-metal/ggml-metal.metal		patch \| blob \| history