talk-llama : fix new rope interface

author Georgi Gerganov <redacted>

Mon, 3 Jul 2023 16:24:01 +0000 (19:24 +0300)

committer Georgi Gerganov <redacted>

Mon, 3 Jul 2023 16:24:01 +0000 (19:24 +0300)
author Georgi Gerganov <redacted>
Mon, 3 Jul 2023 16:24:01 +0000 (19:24 +0300)
committer Georgi Gerganov <redacted>
Mon, 3 Jul 2023 16:24:01 +0000 (19:24 +0300)
diff --git a/examples/talk-llama/llama.cpp b/examples/talk-llama/llama.cpp

index 942407b84a3f3234c1f6f175c2c2758fe5a118f6..77550faa43c22b598f2b90e971f0b71185225858 100644 (file)
--- a/examples/talk-llama/llama.cpp
+++ b/examples/talk-llama/llama.cpp
@@ -281,13 +281,6 @@ static T checked_mul(T a, T b) {
      return ret;
  }
  
-static size_t checked_div(size_t a, size_t b) {
-    if (b == 0 || a % b != 0) {
-        throw format("error dividing %zu / %zu", a, b);
-    }
-    return a / b;
-}
-
  static std::string llama_format_tensor_shape(const std::vector<uint32_t> & ne) {
      char buf[256];
      snprintf(buf, sizeof(buf), "%5u", ne.at(0));
@@ -1237,8 +1230,8 @@ static bool llama_eval_internal(
          // self-attention
          {
              // compute Q and K and RoPE them
-            struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
-            struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
+            struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
+            struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
              ggml_set_name(Qcur, "Qcur");
              ggml_set_name(Kcur, "Kcur");
author	Georgi Gerganov <redacted>
	Mon, 3 Jul 2023 16:24:01 +0000 (19:24 +0300)
committer	Georgi Gerganov <redacted>
	Mon, 3 Jul 2023 16:24:01 +0000 (19:24 +0300)