llama : consistently catch and throw only exceptions deriving from std::exception...

author mgroeber9110 <redacted>

Mon, 5 Jun 2023 20:24:29 +0000 (22:24 +0200)

committer GitHub <redacted>

Mon, 5 Jun 2023 20:24:29 +0000 (23:24 +0300)
author mgroeber9110 <redacted>
Mon, 5 Jun 2023 20:24:29 +0000 (22:24 +0200)
committer GitHub <redacted>
Mon, 5 Jun 2023 20:24:29 +0000 (23:24 +0300)
diff --git a/llama.cpp b/llama.cpp

index d0e7151f47eceb0e8c6b4616902d2891b36de97b..54545f01d401e9f58de5907d7dc6ed1b153af6c0 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -289,15 +289,15 @@ template <typename T>
  static T checked_mul(T a, T b) {
      T ret = a * b;
      if (a != 0 && ret / a != b) {
-        throw format("overflow multiplying %llu * %llu",
-                     (unsigned long long) a, (unsigned long long) b);
+        throw std::runtime_error(format("overflow multiplying %llu * %llu",
+                     (unsigned long long) a, (unsigned long long) b));
      }
      return ret;
  }
  
  static size_t checked_div(size_t a, size_t b) {
      if (b == 0 || a % b != 0) {
-        throw format("error dividing %zu / %zu", a, b);
+        throw std::runtime_error(format("error dividing %zu / %zu", a, b));
      }
      return a / b;
  }
@@ -361,7 +361,7 @@ struct llama_load_tensor {
          const auto & first_shard = shards.at(0);
          for (const auto & shard : shards) {
              if (shard.type != first_shard.type) {
-                throw format("inconsistent tensor shard type in '%s'", name.c_str());
+                throw std::runtime_error(format("inconsistent tensor shard type in '%s'", name.c_str()));
              }
          }
          type = first_shard.type;
@@ -384,8 +384,8 @@ struct llama_load_tensor {
          const auto & first_shard = shards.at(0);
          for (const auto & shard : shards) {
              if (shard.ne != first_shard.ne) {
-                throw format("inconsistent tensor shard shape in '%s': first was %s, other was %s",
-                             name.c_str(), llama_format_tensor_shape(first_shard.ne).c_str(), llama_format_tensor_shape(shard.ne).c_str());
+                throw std::runtime_error(format("inconsistent tensor shard shape in '%s': first was %s, other was %s",
+                             name.c_str(), llama_format_tensor_shape(first_shard.ne).c_str(), llama_format_tensor_shape(shard.ne).c_str()));
              }
          }
          ne = first_shard.ne;
@@ -463,8 +463,8 @@ struct llama_file_loader {
                  }
          }
  
-        throw format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?",
-                     magic, version);
+        throw std::runtime_error(format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?",
+                     magic, version));
      }
      void read_hparams() {
          hparams.n_vocab = file.read_u32();
@@ -504,7 +504,7 @@ struct llama_file_loader {
              file.read_raw(shard.ne.data(), sizeof(shard.ne[0]) * n_dims);
              std::string name = file.read_string(name_len);
              if (n_dims < 1 || n_dims > 2) {
-                throw format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims);
+                throw std::runtime_error(format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims));
              }
              switch (shard.type) {
                  case GGML_TYPE_F32:
@@ -521,7 +521,7 @@ struct llama_file_loader {
                  case GGML_TYPE_Q6_K:
                      break;
                  default: {
-                    throw format("unrecognized tensor type %u\n", shard.type);
+                    throw std::runtime_error(format("unrecognized tensor type %u\n", shard.type));
                  }
              }
  
@@ -630,7 +630,7 @@ struct llama_model_loader {
              auto * ith_file = new llama_file_loader(fname.c_str(), i, tensors_map);
              file_loaders.emplace_back(ith_file);
              if (ith_file->hparams != first_file->hparams) {
-                throw format("llama.cpp: hparams inconsistent between files");
+                throw std::runtime_error(format("llama.cpp: hparams inconsistent between files"));
              }
          }
          if (!llama_mmap::SUPPORTED) {
@@ -660,7 +660,7 @@ struct llama_model_loader {
      uint32_t guess_n_parts() const {
          auto it = tensors_map.name_to_idx.find("tok_embeddings.weight");
          if (it == tensors_map.name_to_idx.end()) {
-            throw std::string("missing tok_embeddings.weight");
+            throw std::runtime_error(std::string("missing tok_embeddings.weight"));
          }
          const llama_load_tensor & lt = tensors_map.tensors.at(it->second);
          return file_loaders.at(0)->hparams.n_embd / lt.shards.at(0).ne.at(0);
@@ -677,12 +677,12 @@ struct llama_model_loader {
      struct ggml_tensor * get_tensor(const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) {
          auto it = tensors_map.name_to_idx.find(name);
          if (it == tensors_map.name_to_idx.end()) {
-            throw format("llama.cpp: tensor '%s' is missing from model", name.c_str());
+            throw std::runtime_error(std::runtime_error(format("llama.cpp: tensor '%s' is missing from model", name.c_str())));
          }
          llama_load_tensor & lt = tensors_map.tensors.at(it->second);
          if (lt.ne != ne) {
-            throw format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s",
-                         name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str());
+            throw std::runtime_error(format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s",
+                         name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str()));
          }
  
          return get_tensor_for(lt, backend);
@@ -706,7 +706,7 @@ struct llama_model_loader {
  
      void done_getting_tensors() const {
          if (num_ggml_tensors_created != tensors_map.tensors.size()) {
-            throw std::string("llama.cpp: file contained more tensors than expected");
+            throw std::runtime_error(std::string("llama.cpp: file contained more tensors than expected"));
          }
      }
  
@@ -994,7 +994,7 @@ static void llama_model_load_internal(
          if (hparams.ftype != LLAMA_FTYPE_ALL_F32     &&
              hparams.ftype != LLAMA_FTYPE_MOSTLY_F16  &&
              hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) {
-            throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)");
+            throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)"));
          }
      }
  
@@ -1002,7 +1002,7 @@ static void llama_model_load_internal(
          if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
              hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
              hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {
-            throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)");
+            throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)"));
          }
      }
  
@@ -1033,7 +1033,7 @@ static void llama_model_load_internal(
  
          model.ctx = ggml_init(params);
          if (!model.ctx) {
-            throw format("ggml_init() failed");
+            throw std::runtime_error(format("ggml_init() failed"));
          }
      }
  
@@ -1214,8 +1214,8 @@ static bool llama_model_load(
          llama_model_load_internal(fname, lctx, n_ctx, n_gpu_layers, memory_type, use_mmap, use_mlock,
                                    vocab_only, progress_callback, progress_callback_user_data);
          return true;
-    } catch (const std::string & err) {
-        fprintf(stderr, "error loading model: %s\n", err.c_str());
+    } catch (const std::exception & err) {
+        fprintf(stderr, "error loading model: %s\n", err.what());
          return false;
      }
  }
@@ -2120,8 +2120,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
          case LLAMA_FTYPE_MOSTLY_Q5_0: quantized_type = GGML_TYPE_Q5_0; break;
          case LLAMA_FTYPE_MOSTLY_Q5_1: quantized_type = GGML_TYPE_Q5_1; break;
          case LLAMA_FTYPE_MOSTLY_Q8_0: quantized_type = GGML_TYPE_Q8_0; break;
+
          // K-quants
-        case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;
+        case LLAMA_FTYPE_MOSTLY_Q2_K:   quantized_type = GGML_TYPE_Q2_K; break;
          case LLAMA_FTYPE_MOSTLY_Q3_K_S:
          case LLAMA_FTYPE_MOSTLY_Q3_K_M:
          case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break;
@@ -2129,8 +2130,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
          case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break;
          case LLAMA_FTYPE_MOSTLY_Q5_K_S:
          case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break;
-        case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
-        default: throw format("invalid output file type %d\n", ftype);
+        case LLAMA_FTYPE_MOSTLY_Q6_K:   quantized_type = GGML_TYPE_Q6_K; break;
+        default: throw std::runtime_error(format("invalid output file type %d\n", ftype));
      }
  
      if (nthread <= 0) {
@@ -2231,7 +2232,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                      f32_data[i] = ggml_fp16_to_fp32(f16_data[i]);
                  }
              } else {
-                throw format("type %s unsupported for integer quantization", ggml_type_name(tensor.type));
+                throw std::runtime_error(format("type %s unsupported for integer quantization", ggml_type_name(tensor.type)));
              }
  
              printf("quantizing .. ");
@@ -2433,8 +2434,8 @@ int llama_model_quantize(
      try {
          llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread);
          return 0;
-    } catch (const std::string & err) {
-        fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.c_str());
+    } catch (const std::exception & err) {
+        fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.what());
          return 1;
      }
  }
@@ -2687,8 +2688,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
  int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, const char * path_base_model, int n_threads) {
      try {
          return llama_apply_lora_from_file_internal(ctx, path_lora, path_base_model, n_threads);
-    } catch (const std::string & err) {
-        fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, err.c_str());
+    } catch (const std::exception & err) {
+        fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, err.what());
          return 1;
      }
  }
author	mgroeber9110 <redacted>
	Mon, 5 Jun 2023 20:24:29 +0000 (22:24 +0200)
committer	GitHub <redacted>
	Mon, 5 Jun 2023 20:24:29 +0000 (23:24 +0300)