train : fix KQ_pos allocation (#3392)

author Georgi Gerganov <redacted>

Fri, 29 Sep 2023 16:05:18 +0000 (19:05 +0300)

committer GitHub <redacted>

Fri, 29 Sep 2023 16:05:18 +0000 (19:05 +0300)
author Georgi Gerganov <redacted>
Fri, 29 Sep 2023 16:05:18 +0000 (19:05 +0300)
committer GitHub <redacted>
Fri, 29 Sep 2023 16:05:18 +0000 (19:05 +0300)
diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp

index b61165fb7c6c93bd0e08b338e3a25fd694faaa8b..8ca1874dafc7e98c99c1c536b18b3577468516a1 100644 (file)
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -626,7 +626,8 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
  
      // KQ_pos - contains the positions
      struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N);
-    {
+    ggml_allocr_alloc(alloc, KQ_pos);
+    if (!ggml_allocr_is_measure(alloc)) {
          int * data = (int *) KQ_pos->data;
          for (int i = 0; i < N; ++i) {
              data[i] = n_past + i;
@@ -786,6 +787,8 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
      ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36->grad, one));
      GGML_ASSERT(t36->grad->data == NULL && t36->grad->view_src == NULL);
      ggml_allocr_alloc(alloc, t36->grad);
+    // KQ_pos
+    ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, KQ_pos, one));
  
      // make sure base model tensors data cannot be used in viewable operations
      ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, model->tok_embeddings, one));
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp

index 5043f32d0375d568d431778e9bed2d3b76a3b26c..be693b3ac7a43bc6bdf590b4d089378881f3373f 100644 (file)
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -334,7 +334,8 @@ static struct ggml_tensor * llama_build_train_graphs(
  
      // KQ_pos - contains the positions
      struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N);
-    {
+    ggml_allocr_alloc(alloc, KQ_pos);
+    if (!ggml_allocr_is_measure(alloc)) {
          int * data = (int *) KQ_pos->data;
          for (int i = 0; i < N; ++i) {
              data[i] = n_past + i;
author	Georgi Gerganov <redacted>
	Fri, 29 Sep 2023 16:05:18 +0000 (19:05 +0300)
committer	GitHub <redacted>
	Fri, 29 Sep 2023 16:05:18 +0000 (19:05 +0300)
examples/finetune/finetune.cpp		patch \| blob \| history
examples/train-text-from-scratch/train-text-from-scratch.cpp		patch \| blob \| history