ggml : fix rope + llama minor optimizations (#3560)

author Herman Semenov <redacted>

Fri, 20 Oct 2023 10:02:12 +0000 (10:02 +0000)

committer GitHub <redacted>

Fri, 20 Oct 2023 10:02:12 +0000 (13:02 +0300)
author Herman Semenov <redacted>
Fri, 20 Oct 2023 10:02:12 +0000 (10:02 +0000)
committer GitHub <redacted>
Fri, 20 Oct 2023 10:02:12 +0000 (13:02 +0300)
diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp

index 5a545a8076460a55ed83ab4437f387f8aee7d31e..ff51cc8034c8b9247f8fe24872cc529a754b7faa 100644 (file)
--- a/common/grammar-parser.cpp
+++ b/common/grammar-parser.cpp
@@ -399,7 +399,7 @@ namespace grammar_parser {
      void print_grammar(FILE * file, const parse_state & state) {
          try {
              std::map<uint32_t, std::string> symbol_id_names;
-            for (auto kv : state.symbol_ids) {
+            for (const auto & kv : state.symbol_ids) {
                  symbol_id_names[kv.second] = kv.first;
              }
              for (size_t i = 0, end = state.rules.size(); i < end; i++) {
diff --git a/common/train.cpp b/common/train.cpp

index 972eaefe00f05b807cefb8c3c115d513734e38b5..154ca56e5fa8773ceb86366fb339cbd8063eeef5 100644 (file)
--- a/common/train.cpp
+++ b/common/train.cpp
@@ -1425,7 +1425,7 @@ void train_opt_callback(void * vdata, int accum_step, float * sched, bool * canc
  
          int impr_plot = -(int)(1 + (opt->loss_before - opt->loss_after) * 10.0f + 0.5f);
          if (impr_plot > 0) impr_plot = 0;
-        if (std::isnan(opt->loss_before) || std::isnan(opt->loss_before)) impr_plot = 0;
+        if (std::isnan(opt->loss_before) || std::isnan(opt->loss_after)) impr_plot = 0;
          printf("%s: iter=%6d sample=%zu/%zu sched=%f loss=%f",
              __func__, opt->iter, std::min(1+train->shuffle_next_sample, train->shuffle_sample_count), train->shuffle_sample_count,
              *sched, opt->loss_after);
diff --git a/ggml.c b/ggml.c

index 630deb49d0a443b8d15e88c13cc6b7af84082809..ed157aab0993022eb8fc416d58ff9160f5a5ea46 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -13537,7 +13537,7 @@ static void ggml_compute_forward_rope_f16(
                          dst_data[n_dims]     = GGML_FP32_TO_FP16(x2*cos_block_theta - x3*sin_block_theta);
                          dst_data[n_dims/2*3] = GGML_FP32_TO_FP16(x2*sin_block_theta + x3*cos_block_theta);
                      }
-                } if (!is_neox) {
+                } else if (!is_neox) {
                      for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
                          const float cos_theta = cosf(theta);
                          const float sin_theta = sinf(theta);
@@ -19170,6 +19170,7 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
  
                              if (idx == -1) {
                                  fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i);
+                                fclose(fout);
                                  return;
                              }
  
diff --git a/llama.cpp b/llama.cpp

index ed876668220fca5cb8bdda37187a0ffb1df42cc8..ec8ffad33a2eed2fad140306f684ef6d3d791646 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -6324,7 +6324,6 @@ struct llm_tokenizer_bpe {
                  llm_symbol sym;
                  size_t char_len = std::min(word.size() - offset, (size_t) ::utf8_len(word[offset]));
                  sym.text = word.c_str() + offset;
-                sym.n = 1;
                  sym.n = char_len;
                  offset += sym.n;
                  sym.prev = index - 1;
@@ -7054,7 +7053,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
      std::vector<llama_grammar_candidate> rejects;
  
      if (stack.empty()) {
-        for (auto tok : candidates) {
+        for (const auto & tok : candidates) {
              if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
                  rejects.push_back(tok);
              }
@@ -7065,7 +7064,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
      const llama_grammar_element * stack_pos = stack.back();
  
      std::vector<llama_grammar_candidate> next_candidates;
-    for (auto tok : candidates) {
+    for (const auto & tok : candidates) {
          if (*tok.code_points == 0) {
              // reached end of full codepoints in token, reject iff it ended in a partial sequence
              // that cannot satisfy this position in grammar
@@ -7091,7 +7090,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
      llama_grammar_advance_stack(rules, stack_after, next_stacks);
  
      auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
-    for (auto tok : next_rejects) {
+    for (const auto & tok : next_rejects) {
          rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
      }
author	Herman Semenov <redacted>
	Fri, 20 Oct 2023 10:02:12 +0000 (10:02 +0000)
committer	GitHub <redacted>
	Fri, 20 Oct 2023 10:02:12 +0000 (13:02 +0300)
common/grammar-parser.cpp		patch \| blob \| history
common/train.cpp		patch \| blob \| history
ggml.c		patch \| blob \| history
llama.cpp		patch \| blob \| history