ggml : fix cont with transposed tensors when one dimension is 1 (ggml/934)

author Salvatore Mesoraca <redacted>

Wed, 28 Aug 2024 08:23:02 +0000 (10:23 +0200)

committer Georgi Gerganov <redacted>

Mon, 2 Sep 2024 12:24:50 +0000 (15:24 +0300)
author Salvatore Mesoraca <redacted>
Wed, 28 Aug 2024 08:23:02 +0000 (10:23 +0200)
committer Georgi Gerganov <redacted>
Mon, 2 Sep 2024 12:24:50 +0000 (15:24 +0300)
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c

index 9c105fd353de4c12ab189fc7f2601d7e3ae8238d..3b059ab6db07ebcad4b114f5a1cba9270982e378 100644 (file)
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -8120,8 +8120,7 @@ static void ggml_compute_forward_dup_same_cont(
      GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
      GGML_ASSERT(src0->type == dst->type);
  
-    const size_t nb00 = src0->nb[0];
-    const size_t nb0 = dst->nb[0];
+    const size_t nb0 = ggml_type_size(src0->type);
  
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
@@ -8135,8 +8134,8 @@ static void ggml_compute_forward_dup_same_cont(
      if (ie0 < ie1) {
          memcpy(
              ((char *)  dst->data + ie0*nb0),
-            ((char *) src0->data + ie0*nb00),
-            (ie1 - ie0) * ggml_type_size(src0->type));
+            ((char *) src0->data + ie0*nb0),
+            (ie1 - ie0) * nb0);
      }
  }
  
@@ -8153,11 +8152,6 @@ static void ggml_compute_forward_dup_f16(
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
  
-    if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
-        ggml_compute_forward_dup_same_cont(params, dst);
-        return;
-    }
-
      // parallelize by rows
      const int nr = ne01;
      // number of rows per thread
@@ -8422,11 +8416,6 @@ static void ggml_compute_forward_dup_bf16(
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
  
-    if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
-        ggml_compute_forward_dup_same_cont(params, dst);
-        return;
-    }
-
      // parallelize by rows
      const int nr = ne01;
      // number of rows per thread
@@ -8778,11 +8767,6 @@ static void ggml_compute_forward_dup_f32(
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
  
-    if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
-        ggml_compute_forward_dup_same_cont(params, dst);
-        return;
-    }
-
      // parallelize by rows
      const int nr = ne01;
      // number of rows per thread
@@ -9092,13 +9076,13 @@ static void ggml_compute_forward_dup_bytes(
      GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
      GGML_ASSERT(src0->type == dst->type);
  
+    GGML_TENSOR_UNARY_OP_LOCALS;
+
      if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
          ggml_compute_forward_dup_same_cont(params, dst);
          return;
      }
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
-
      const size_t type_size = ggml_type_size(src0->type);
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
author	Salvatore Mesoraca <redacted>
	Wed, 28 Aug 2024 08:23:02 +0000 (10:23 +0200)
committer	Georgi Gerganov <redacted>
	Mon, 2 Sep 2024 12:24:50 +0000 (15:24 +0300)