minor : clean-up some warnings and style (llama/5094)

author Georgi Gerganov <redacted>

Tue, 23 Jan 2024 12:12:57 +0000 (14:12 +0200)

committer Georgi Gerganov <redacted>

Sat, 27 Jan 2024 15:19:51 +0000 (17:19 +0200)
author Georgi Gerganov <redacted>
Tue, 23 Jan 2024 12:12:57 +0000 (14:12 +0200)
committer Georgi Gerganov <redacted>
Sat, 27 Jan 2024 15:19:51 +0000 (17:19 +0200)
diff --git a/ggml.c b/ggml.c

index 2a52c5ff0a60c20a549788c65d78e336ae2c5ab0..6a1e218730af95b5bd01d6a12b0fd8b5dd682a66 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -5368,14 +5368,12 @@ struct ggml_tensor * ggml_conv_depthwise_2d(
      struct ggml_context * ctx,
      struct ggml_tensor * a,
      struct ggml_tensor * b,
-    struct ggml_tensor * c,
      int                  s0,
      int                  s1,
      int                  p0,
      int                  p1,
      int                  d0,
      int                  d1) {
-
      struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
      struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
                                          ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
@@ -9996,7 +9994,7 @@ static void ggml_compute_forward_mul_mat(
              return;
          }
  
-        const int64_t tgemm0 = ggml_perf_time_us();
+        //const int64_t tgemm0 = ggml_perf_time_us();
          for (int64_t i13 = 0; i13 < ne13; i13++) {
              for (int64_t i12 = 0; i12 < ne12; i12++) {
                  const int64_t i03 = i13/r3;
@@ -16939,7 +16937,10 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
                      if (ggml_compute_forward_mul_mat_use_blas(node)) {
                          if (node->src[0]->type != GGML_TYPE_F32) {
                              // here we need memory for fully dequantized matrix from src0
-                            cur = ggml_type_size(GGML_TYPE_F32)*ggml_nelements(node->src[0]);
+                            // take into account that src0 can be broadcasted into src1[2,3]
+                            cur = ggml_type_size(GGML_TYPE_F32)
+                                * node->src[0]->ne[0]*node->src[0]->ne[1]
+                                * node->src[1]->ne[2]*node->src[1]->ne[3];
                          }
                      } else
  #endif
diff --git a/ggml.h b/ggml.h

index dca7bd9ceb0d505590e196def782e78e55937726..1c4976271677425c3e54c5cddb9b379f88367650 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -1499,7 +1499,6 @@ extern "C" {
              struct ggml_context * ctx,
              struct ggml_tensor  * a,
              struct ggml_tensor  * b,
-            struct ggml_tensor  * c,
              int                  s0,
              int                  s1,
              int                  p0,
author	Georgi Gerganov <redacted>
	Tue, 23 Jan 2024 12:12:57 +0000 (14:12 +0200)
committer	Georgi Gerganov <redacted>
	Sat, 27 Jan 2024 15:19:51 +0000 (17:19 +0200)